Esempio n. 1
0
def add_back_compatibility_info(config_dir):
    """This will be removed when python script refactoring is done."""

    common_lib.run_kaldi_command("nnet3-init {0}/ref.config "
                                 "{0}/ref.raw".format(config_dir))
    out, err = common_lib.run_kaldi_command("nnet3-info {0}/ref.raw | "
                                            "head -4".format(config_dir))
    # out looks like this
    # left-context: 7
    # right-context: 0
    # num-parameters: 90543902
    # modulus: 1
    info = {}
    for line in out.split("\n"):
        parts = line.split(":")
        if len(parts) != 2:
            continue
        info[parts[0].strip()] = int(parts[1].strip())

    # Writing the back-compatible vars file
    #   model_left_context=0
    #   model_right_context=7
    #   num_hidden_layers=3
    vf = open('{0}/vars'.format(config_dir), 'w')
    vf.write('model_left_context={0}\n'.format(info['left-context']))
    vf.write('model_right_context={0}\n'.format(info['right-context']))
    vf.write('num_hidden_layers=1\n')
    vf.close()

    common_lib.force_symlink("final.config".format(config_dir),
                             "{0}/layer1.config".format(config_dir))
Esempio n. 2
0
def do_shrinkage(iter, model_file, shrink_saturation_threshold,
                 get_raw_nnet_from_am=True):

    if iter == 0:
        return True

    if get_raw_nnet_from_am:
        output, error = common_lib.run_kaldi_command(
            "nnet3-am-info --print-args=false {0} | "
            "steps/nnet3/get_saturation.pl".format(model_file))
    else:
        output, error = common_lib.run_kaldi_command(
            "nnet3-info --print-args=false {0} | "
            "steps/nnet3/get_saturation.pl".format(model_file))
    output = output.strip().split("\n")
    try:
        assert len(output) == 1
        saturation = float(output[0])
        assert saturation >= 0 and saturation <= 1
    except:
        raise Exception("Something went wrong, could not get "
                        "saturation from the output '{0}' of "
                        "get_saturation.pl on the info of "
                        "model {1}".format(output, model_file))
    return (saturation > shrink_saturation_threshold)
Esempio n. 3
0
def parse_prob_logs(exp_dir, key='accuracy', output="output"):
    train_prob_files = "%s/log/compute_prob_train.*.log" % (exp_dir)
    valid_prob_files = "%s/log/compute_prob_valid.*.log" % (exp_dir)
    train_prob_strings = common_lib.run_kaldi_command(
        'grep -e {0} {1}'.format(key, train_prob_files), wait=True)[0]
    valid_prob_strings = common_lib.run_kaldi_command(
        'grep -e {0} {1}'.format(key, valid_prob_files))[0]

    # LOG
    # (nnet3-chain-compute-prob:PrintTotalStats():nnet-chain-diagnostics.cc:149)
    # Overall log-probability for 'output' is -0.399395 + -0.013437 = -0.412832
    # per frame, over 20000 fra

    # LOG
    # (nnet3-chain-compute-prob:PrintTotalStats():nnet-chain-diagnostics.cc:144)
    # Overall log-probability for 'output' is -0.307255 per frame, over 20000
    # frames.

    parse_regex = re.compile(
        ".*compute_prob_.*\.([0-9]+).log:LOG "
        ".nnet3.*compute-prob.*:PrintTotalStats..:"
        "nnet.*diagnostics.cc:[0-9]+. Overall ([a-zA-Z\-]+) for "
        "'{output}'.*is ([0-9.\-e]+) .*per frame".format(output=output))

    train_loss = {}
    valid_loss = {}

    for line in train_prob_strings.split('\n'):
        mat_obj = parse_regex.search(line)
        if mat_obj is not None:
            groups = mat_obj.groups()
            if groups[1] == key:
                train_loss[int(groups[0])] = groups[2]
    if not train_loss:
        raise KaldiLogParseException("Could not find any lines with {k} in "
                " {l}".format(k=key, l=train_prob_files))

    for line in valid_prob_strings.split('\n'):
        mat_obj = parse_regex.search(line)
        if mat_obj is not None:
            groups = mat_obj.groups()
            if groups[1] == key:
                valid_loss[int(groups[0])] = groups[2]

    if not valid_loss:
        raise KaldiLogParseException("Could not find any lines with {k} in "
                " {l}".format(k=key, l=valid_prob_files))

    iters = list(set(valid_loss.keys()).intersection(train_loss.keys()))
    if not iters:
        raise KaldiLogParseException("Could not any common iterations with"
                " key {k} in both {tl} and {vl}".format(
                    k=key, tl=train_prob_files, vl=valid_prob_files))
    iters.sort()
    return map(lambda x: (int(x), float(train_loss[x]),
                          float(valid_loss[x])), iters)
Esempio n. 4
0
 def compile(self):
     root, ext = os.path.splitext(self.pdf_file)
     dir_name = os.path.dirname(self.pdf_file)
     latex_file = root + ".tex"
     lat_file = open(latex_file, "w")
     lat_file.write("\n".join(self.document))
     lat_file.close()
     logger.info("Compiling the latex report.")
     try:
         common_lib.run_kaldi_command(
             "pdflatex -interaction=batchmode "
             "-output-directory={0} {1}".format(dir_name, latex_file))
     except Exception as e:
         logger.warning("There was an error compiling the latex file {0}, "
                        "please do it manually: {1}".format(latex_file, e))
         return False
     return True
Esempio n. 5
0
def parse_progress_logs_for_nonlinearity_stats(exp_dir):
    """ Parse progress logs for mean and std stats for non-linearities.

    e.g. for a line that is parsed from progress.*.log:
    exp/nnet3/lstm_self_repair_ld5_sp/log/progress.9.log:component name=Lstm3_i
    type=SigmoidComponent, dim=1280, self-repair-scale=1e-05, count=1.96e+05,
    value-avg=[percentiles(0,1,2,5 10,20,50,80,90
    95,98,99,100)=(0.05,0.09,0.11,0.15 0.19,0.27,0.50,0.72,0.83
    0.88,0.92,0.94,0.99), mean=0.502, stddev=0.23],
    deriv-avg=[percentiles(0,1,2,5 10,20,50,80,90
    95,98,99,100)=(0.009,0.04,0.05,0.06 0.08,0.10,0.14,0.17,0.18
    0.19,0.20,0.20,0.21), mean=0.134, stddev=0.0397]
    """

    progress_log_files = "%s/log/progress.*.log" % (exp_dir)
    stats_per_component_per_iter = {}

    progress_log_lines = common_lib.run_kaldi_command(
        'grep -e "value-avg.*deriv-avg" {0}'.format(progress_log_files))[0]

    parse_regex = re.compile(
        ".*progress.([0-9]+).log:component name=(.+) "
        "type=(.*)Component,.*"
        "value-avg=\[.*mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*"
        "deriv-avg=\[.*mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\]")

    for line in progress_log_lines.split("\n"):
        mat_obj = parse_regex.search(line)
        if mat_obj is None:
            continue
        # groups = ('9', 'Lstm3_i', 'Sigmoid', '0.502', '0.23',
        # '0.134', '0.0397')
        groups = mat_obj.groups()
        iteration = int(groups[0])
        component_name = groups[1]
        component_type = groups[2]
        value_mean = float(groups[3])
        value_stddev = float(groups[4])
        deriv_mean = float(groups[5])
        deriv_stddev = float(groups[6])
        try:
            stats_per_component_per_iter[component_name][
                'stats'][iteration] = [value_mean, value_stddev,
                                       deriv_mean, deriv_stddev]
        except KeyError:
            stats_per_component_per_iter[component_name] = {}
            stats_per_component_per_iter[component_name][
                'type'] = component_type
            stats_per_component_per_iter[component_name]['stats'] = {}
            stats_per_component_per_iter[component_name][
                'stats'][iteration] = [value_mean, value_stddev,
                                       deriv_mean, deriv_stddev]

    return stats_per_component_per_iter
Esempio n. 6
0
def parse_train_logs(exp_dir):
    train_log_files = "%s/log/train.*.log" % (exp_dir)
    train_log_lines = common_lib.run_kaldi_command(
        'grep -e Accounting {0}'.format(train_log_files))[0]
    parse_regex = re.compile(".*train\.([0-9]+)\.([0-9]+)\.log:# "
                             "Accounting: time=([0-9]+) thread.*")

    train_times = {}
    for line in train_log_lines.split('\n'):
        mat_obj = parse_regex.search(line)
        if mat_obj is not None:
            groups = mat_obj.groups()
            try:
                train_times[int(groups[0])][int(groups[1])] = float(groups[2])
            except KeyError:
                train_times[int(groups[0])] = {}
                train_times[int(groups[0])][int(groups[1])] = float(groups[2])
    iters = train_times.keys()
    for iter in iters:
        values = train_times[iter].values()
        train_times[iter] = max(values)
    return train_times
Esempio n. 7
0
def train(args, run_opts, background_process_handler):
    """ The main function for training.

    Args:
        args: a Namespace object with the required parameters
            obtained from the function process_args()
        run_opts: RunOpts object obtained from the process_args()
    """

    arg_string = pprint.pformat(vars(args))
    logger.info("Arguments for the experiment\n{0}".format(arg_string))

    # Check files
    chain_lib.check_for_required_files(args.feat_dir, args.tree_dir,
                                       args.lat_dir)

    # Set some variables.
    num_jobs = common_lib.get_number_of_jobs(args.tree_dir)
    feat_dim = common_lib.get_feat_dim(args.feat_dir)
    ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir)

    # split the training data into parts for individual jobs
    # we will use the same number of jobs as that used for alignment
    common_lib.split_data(args.feat_dir, num_jobs)
    shutil.copy('{0}/tree'.format(args.tree_dir), args.dir)
    with open('{0}/num_jobs'.format(args.dir), 'w') as f:
        f.write(str(num_jobs))

    config_dir = '{0}/configs'.format(args.dir)
    var_file = '{0}/vars'.format(config_dir)

    variables = common_train_lib.parse_generic_config_vars_file(var_file)

    # Set some variables.
    try:
        model_left_context = variables['model_left_context']
        model_right_context = variables['model_right_context']
        # this is really the number of times we add layers to the network for
        # discriminative pretraining
        num_hidden_layers = variables['num_hidden_layers']
    except KeyError as e:
        raise Exception("KeyError {0}: Variables need to be defined in "
                        "{1}".format(str(e), '{0}/configs'.format(args.dir)))

    left_context = args.chunk_left_context + model_left_context
    right_context = args.chunk_right_context + model_right_context

    # Initialize as "raw" nnet, prior to training the LDA-like preconditioning
    # matrix.  This first config just does any initial splicing that we do;
    # we do this as it's a convenient way to get the stats for the 'lda-like'
    # transform.
    if (args.stage <= -6):
        logger.info("Creating phone language-model")
        chain_lib.create_phone_lm(args.dir, args.tree_dir, run_opts,
                                  lm_opts=args.lm_opts)

    if (args.stage <= -5):
        logger.info("Creating denominator FST")
        chain_lib.create_denominator_fst(args.dir, args.tree_dir, run_opts)

    if (args.stage <= -4):
        logger.info("Initializing a basic network for estimating "
                    "preconditioning matrix")
        common_lib.run_kaldi_command(
            """{command} {dir}/log/nnet_init.log \
                    nnet3-init --srand=-2 {dir}/configs/init.config \
                    {dir}/init.raw""".format(command=run_opts.command,
                                             dir=args.dir))

    egs_left_context = left_context + args.frame_subsampling_factor/2
    egs_right_context = right_context + args.frame_subsampling_factor/2

    default_egs_dir = '{0}/egs'.format(args.dir)
    if (args.stage <= -3) and args.egs_dir is None:
        logger.info("Generating egs")
        # this is where get_egs.sh is called.
        chain_lib.generate_chain_egs(
            dir=args.dir, data=args.feat_dir,
            lat_dir=args.lat_dir, egs_dir=default_egs_dir,
            left_context=egs_left_context,
            right_context=egs_right_context,
            run_opts=run_opts,
            left_tolerance=args.left_tolerance,
            right_tolerance=args.right_tolerance,
            frame_subsampling_factor=args.frame_subsampling_factor,
            alignment_subsampling_factor=args.alignment_subsampling_factor,
            frames_per_eg=args.chunk_width,
            srand=args.srand,
            egs_opts=args.egs_opts,
            cmvn_opts=args.cmvn_opts,
            online_ivector_dir=args.online_ivector_dir,
            frames_per_iter=args.frames_per_iter,
            transform_dir=args.transform_dir,
            stage=args.egs_stage)

    if args.egs_dir is None:
        egs_dir = default_egs_dir
    else:
        egs_dir = args.egs_dir

    [egs_left_context, egs_right_context,
     frames_per_eg, num_archives] = (
        common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim,
                                        egs_left_context, egs_right_context))
    assert(args.chunk_width == frames_per_eg)
    num_archives_expanded = num_archives * args.frame_subsampling_factor

    if (args.num_jobs_final > num_archives_expanded):
        raise Exception('num_jobs_final cannot exceed the '
                        'expanded number of archives')

    # copy the properties of the egs to dir for
    # use during decoding
    common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir)

    if (args.stage <= -2):
        logger.info('Computing the preconditioning matrix for input features')

        chain_lib.compute_preconditioning_matrix(
            args.dir, egs_dir, num_archives, run_opts,
            max_lda_jobs=args.max_lda_jobs,
            rand_prune=args.rand_prune)

    if (args.stage <= -1):
        logger.info("Preparing the initial acoustic model.")
        chain_lib.prepare_initial_acoustic_model(args.dir, run_opts)

    with open("{0}/frame_subsampling_factor".format(args.dir), "w") as f:
        f.write(str(args.frame_subsampling_factor))

    # set num_iters so that as close as possible, we process the data
    # $num_epochs times, i.e. $num_iters*$avg_num_jobs) ==
    # $num_epochs*$num_archives, where
    # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2.
    num_archives_to_process = args.num_epochs * num_archives_expanded
    num_archives_processed = 0
    num_iters = ((num_archives_to_process * 2)
                 / (args.num_jobs_initial + args.num_jobs_final))

    models_to_combine = common_train_lib.verify_iterations(
        num_iters, args.num_epochs,
        num_hidden_layers, num_archives_expanded,
        args.max_models_combine, args.add_layers_period,
        args.num_jobs_final)

    def learning_rate(iter, current_num_jobs, num_archives_processed):
        return common_train_lib.get_learning_rate(iter, current_num_jobs,
                                                  num_iters,
                                                  num_archives_processed,
                                                  num_archives_to_process,
                                                  args.initial_effective_lrate,
                                                  args.final_effective_lrate)

    min_deriv_time = None
    max_deriv_time = None
    if args.deriv_truncate_margin is not None:
        min_deriv_time = -args.deriv_truncate_margin - model_left_context
        max_deriv_time = (args.chunk_width - 1 + args.deriv_truncate_margin
                          + model_right_context)

    logger.info("Training will run for {0} epochs = "
                "{1} iterations".format(args.num_epochs, num_iters))

    for iter in range(num_iters):
        if (args.exit_stage is not None) and (iter == args.exit_stage):
            logger.info("Exiting early due to --exit-stage {0}".format(iter))
            return
        current_num_jobs = int(0.5 + args.num_jobs_initial
                               + (args.num_jobs_final - args.num_jobs_initial)
                               * float(iter) / num_iters)

        if args.stage <= iter:
            model_file = "{dir}/{iter}.mdl".format(dir=args.dir, iter=iter)
            shrinkage_value = 1.0
            if args.shrink_value != 1.0:
                shrinkage_value = (args.shrink_value
                                   if common_train_lib.do_shrinkage(
                                        iter, model_file,
                                        args.shrink_saturation_threshold)
                                   else 1
                                   )
            logger.info("On iteration {0}, learning rate is {1} and "
                        "shrink value is {2}.".format(
                            iter, learning_rate(iter, current_num_jobs,
                                                num_archives_processed),
                            shrinkage_value))

            chain_lib.train_one_iteration(
                dir=args.dir,
                iter=iter,
                srand=args.srand,
                egs_dir=egs_dir,
                num_jobs=current_num_jobs,
                num_archives_processed=num_archives_processed,
                num_archives=num_archives,
                learning_rate=learning_rate(iter, current_num_jobs,
                                            num_archives_processed),
                shrinkage_value=shrinkage_value,
                num_chunk_per_minibatch=args.num_chunk_per_minibatch,
                num_hidden_layers=num_hidden_layers,
                add_layers_period=args.add_layers_period,
                left_context=left_context,
                right_context=right_context,
                apply_deriv_weights=args.apply_deriv_weights,
                min_deriv_time=min_deriv_time,
                max_deriv_time=max_deriv_time,
                l2_regularize=args.l2_regularize,
                xent_regularize=args.xent_regularize,
                leaky_hmm_coefficient=args.leaky_hmm_coefficient,
                momentum=args.momentum,
                max_param_change=args.max_param_change,
                shuffle_buffer_size=args.shuffle_buffer_size,
                frame_subsampling_factor=args.frame_subsampling_factor,
                truncate_deriv_weights=args.truncate_deriv_weights,
                run_opts=run_opts,
                background_process_handler=background_process_handler)

            if args.cleanup:
                # do a clean up everythin but the last 2 models, under certain
                # conditions
                common_train_lib.remove_model(
                    args.dir, iter-2, num_iters, models_to_combine,
                    args.preserve_model_interval)

            if args.email is not None:
                reporting_iter_interval = num_iters * args.reporting_interval
                if iter % reporting_iter_interval == 0:
                    # lets do some reporting
                    [report, times, data] = (
                        nnet3_log_parse.generate_accuracy_report(
                            args.dir, "log-probability"))
                    message = report
                    subject = ("Update : Expt {dir} : "
                               "Iter {iter}".format(dir=args.dir, iter=iter))
                    common_lib.send_mail(message, subject, args.email)

        num_archives_processed = num_archives_processed + current_num_jobs

    if args.stage <= num_iters:
        logger.info("Doing final combination to produce final.mdl")
        chain_lib.combine_models(
            dir=args.dir, num_iters=num_iters,
            models_to_combine=models_to_combine,
            num_chunk_per_minibatch=args.num_chunk_per_minibatch,
            egs_dir=egs_dir,
            left_context=left_context, right_context=right_context,
            leaky_hmm_coefficient=args.leaky_hmm_coefficient,
            l2_regularize=args.l2_regularize,
            xent_regularize=args.xent_regularize,
            run_opts=run_opts,
            background_process_handler=background_process_handler)

    if args.cleanup:
        logger.info("Cleaning up the experiment directory "
                    "{0}".format(args.dir))
        remove_egs = args.remove_egs
        if args.egs_dir is not None:
            # this egs_dir was not created by this experiment so we will not
            # delete it
            remove_egs = False

        common_train_lib.clean_nnet_dir(
            args.dir, num_iters, egs_dir,
            preserve_model_interval=args.preserve_model_interval,
            remove_egs=remove_egs)

    # do some reporting
    [report, times, data] = nnet3_log_parse.generate_accuracy_report(
        args.dir, "log-probability")
    if args.email is not None:
        common_lib.send_mail(report, "Update : Expt {0} : "
                                     "complete".format(args.dir), args.email)

    with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f:
        f.write(report)

    common_lib.run_kaldi_command("steps/info/nnet3_dir_info.pl "
                                 "{0}".format(args.dir))
Esempio n. 8
0
def parse_progress_logs_for_param_diff(exp_dir, pattern):
    """ Parse progress logs for per-component parameter differences.

    e.g. for a line that is parsed from progress.*.log:
    exp/chain/cwrnn_trial2_ld5_sp/log/progress.245.log:LOG
    (nnet3-show-progress:main():nnet3-show-progress.cc:144) Relative parameter
    differences per layer are [ Cwrnn1_T3_W_r:0.0171537
    Cwrnn1_T3_W_x:1.33338e-07 Cwrnn1_T2_W_r:0.048075 Cwrnn1_T2_W_x:1.34088e-07
    Cwrnn1_T1_W_r:0.0157277 Cwrnn1_T1_W_x:0.0212704 Final_affine:0.0321521
    Cwrnn2_T3_W_r:0.0212082 Cwrnn2_T3_W_x:1.33691e-07 Cwrnn2_T2_W_r:0.0212978
    Cwrnn2_T2_W_x:1.33401e-07 Cwrnn2_T1_W_r:0.014976 Cwrnn2_T1_W_x:0.0233588
    Cwrnn3_T3_W_r:0.0237165 Cwrnn3_T3_W_x:1.33184e-07 Cwrnn3_T2_W_r:0.0239754
    Cwrnn3_T2_W_x:1.3296e-07 Cwrnn3_T1_W_r:0.0194809 Cwrnn3_T1_W_x:0.0271934 ]
    """

    if pattern not in set(["Relative parameter differences",
                           "Parameter differences"]):
        raise Exception("Unknown value for pattern : {0}".format(pattern))

    progress_log_files = "%s/log/progress.*.log" % (exp_dir)
    progress_per_iter = {}
    component_names = set([])
    progress_log_lines = common_lib.run_kaldi_command(
        'grep -e "{0}" {1}'.format(pattern, progress_log_files))[0]
    parse_regex = re.compile(".*progress\.([0-9]+)\.log:"
                             "LOG.*{0}.*\[(.*)\]".format(pattern))
    for line in progress_log_lines.split("\n"):
        mat_obj = parse_regex.search(line)
        if mat_obj is None:
            continue
        groups = mat_obj.groups()
        iteration = groups[0]
        differences = parse_difference_string(groups[1])
        component_names = component_names.union(differences.keys())
        progress_per_iter[int(iteration)] = differences

    component_names = list(component_names)
    component_names.sort()
    # rearranging the parameter differences available per iter
    # into parameter differences per component
    progress_per_component = {}
    for cn in component_names:
        progress_per_component[cn] = {}

    max_iter = max(progress_per_iter.keys())
    total_missing_iterations = 0
    gave_user_warning = False
    for iter in range(max_iter + 1):
        try:
            component_dict = progress_per_iter[iter]
        except KeyError:
            continue

        for component_name in component_names:
            try:
                progress_per_component[component_name][iter] = component_dict[
                    component_name]
            except KeyError:
                total_missing_iterations += 1
                # the component was not found this iteration, may be because of
                # layerwise discriminative training
                pass
        if (total_missing_iterations/len(component_names) > 20
                and not gave_user_warning and logger is not None):
            logger.warning("There are more than {0} missing iterations per "
                           "component. Something might be wrong.".format(
                                total_missing_iterations/len(component_names)))
            gave_user_warning = True

    return {'progress_per_component': progress_per_component,
            'component_names': component_names,
            'max_iter': max_iter}
Esempio n. 9
0
def parse_progress_logs_for_clipped_proportion(exp_dir):
    """ Parse progress logs for clipped proportion stats.

    e.g. for a line that is parsed from progress.*.log:
    exp/chain/cwrnn_trial2_ld5_sp/log/progress.245.log:component
    name=BLstm1_forward_c type=ClipGradientComponent, dim=512,
    norm-based-clipping=true, clipping-threshold=30,
    clipped-proportion=0.000565527,
    self-repair-clipped-proportion-threshold=0.01, self-repair-target=0,
    self-repair-scale=1
    """

    progress_log_files = "%s/log/progress.*.log" % (exp_dir)
    component_names = set([])
    progress_log_lines = common_lib.run_kaldi_command(
        'grep -e "{0}" {1}'.format(
            "clipped-proportion", progress_log_files))[0]
    parse_regex = re.compile(".*progress\.([0-9]+)\.log:component "
                             "name=(.*) type=.* "
                             "clipped-proportion=([0-9\.e\-]+)")

    cp_per_component_per_iter = {}

    max_iteration = 0
    component_names = set([])
    for line in progress_log_lines.split("\n"):
        mat_obj = parse_regex.search(line)
        if mat_obj is None:
            if line.strip() == "":
                continue
            raise MalformedClippedProportionLineException(line)
        groups = mat_obj.groups()
        iteration = int(groups[0])
        max_iteration = max(max_iteration, iteration)
        name = groups[1]
        clipped_proportion = float(groups[2])
        if clipped_proportion > 1:
            raise MalformedClippedProportionLineException(line)
        if iteration not in cp_per_component_per_iter:
            cp_per_component_per_iter[iteration] = {}
        cp_per_component_per_iter[iteration][name] = clipped_proportion
        component_names.add(name)
    component_names = list(component_names)
    component_names.sort()

    # re arranging the data into an array
    # and into an cp_per_iter_per_component
    cp_per_iter_per_component = {}
    for component_name in component_names:
        cp_per_iter_per_component[component_name] = []
    data = []
    data.append(["iteration"]+component_names)
    for iter in range(max_iteration+1):
        if iter not in cp_per_component_per_iter:
            continue
        comp_dict = cp_per_component_per_iter[iter]
        row = [iter]
        for component in component_names:
            try:
                row.append(comp_dict[component])
                cp_per_iter_per_component[component].append(
                    [iter, comp_dict[component]])
            except KeyError:
                # if clipped proportion is not available for a particular
                # component it is set to None
                # this usually happens during layer-wise discriminative
                # training
                row.append(None)
        data.append(row)

    return {'table': data,
            'cp_per_component_per_iter': cp_per_component_per_iter,
            'cp_per_iter_per_component': cp_per_iter_per_component}
Esempio n. 10
0
def train(args, run_opts, background_process_handler):
    """ The main function for training.

    Args:
        args: a Namespace object with the required parameters
            obtained from the function process_args()
        run_opts: RunOpts object obtained from the process_args()
    """

    arg_string = pprint.pformat(vars(args))
    logger.info("Arguments for the experiment\n{0}".format(arg_string))

    # Check files
    chain_lib.check_for_required_files(args.feat_dir, args.tree_dir,
                                       args.lat_dir)

    # Set some variables.
    num_jobs = common_lib.get_number_of_jobs(args.tree_dir)
    feat_dim = common_lib.get_feat_dim(args.feat_dir)
    ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir)
    ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir)

    # split the training data into parts for individual jobs
    # we will use the same number of jobs as that used for alignment
    common_lib.split_data(args.feat_dir, num_jobs)
    shutil.copy('{0}/tree'.format(args.tree_dir), args.dir)
    with open('{0}/num_jobs'.format(args.dir), 'w') as f:
        f.write(str(num_jobs))

    config_dir = '{0}/configs'.format(args.dir)
    var_file = '{0}/vars'.format(config_dir)

    variables = common_train_lib.parse_generic_config_vars_file(var_file)

    # Set some variables.
    try:
        model_left_context = variables['model_left_context']
        model_right_context = variables['model_right_context']
        # this is really the number of times we add layers to the network for
        # discriminative pretraining
        num_hidden_layers = variables['num_hidden_layers']
    except KeyError as e:
        raise Exception("KeyError {0}: Variables need to be defined in "
                        "{1}".format(str(e), '{0}/configs'.format(args.dir)))

    left_context = args.chunk_left_context + model_left_context
    right_context = args.chunk_right_context + model_right_context
    left_context_initial = (args.chunk_left_context_initial +
                            model_left_context
                            if args.chunk_left_context_initial >= 0 else -1)
    right_context_final = (args.chunk_right_context_final + model_right_context
                           if args.chunk_right_context_final >= 0 else -1)

    # Initialize as "raw" nnet, prior to training the LDA-like preconditioning
    # matrix.  This first config just does any initial splicing that we do;
    # we do this as it's a convenient way to get the stats for the 'lda-like'
    # transform.
    if (args.stage <= -6):
        logger.info("Creating phone language-model")
        chain_lib.create_phone_lm(args.dir,
                                  args.tree_dir,
                                  run_opts,
                                  lm_opts=args.lm_opts)

    if (args.stage <= -5):
        logger.info("Creating denominator FST")
        chain_lib.create_denominator_fst(args.dir, args.tree_dir, run_opts)

    if (args.stage <= -4):
        logger.info("Initializing a basic network for estimating "
                    "preconditioning matrix")
        common_lib.run_kaldi_command("""{command} {dir}/log/nnet_init.log \
                    nnet3-init --srand=-2 {dir}/configs/init.config \
                    {dir}/init.raw""".format(command=run_opts.command,
                                             dir=args.dir))

    egs_left_context = left_context + args.frame_subsampling_factor / 2
    egs_right_context = right_context + args.frame_subsampling_factor / 2
    egs_left_context_initial = (left_context_initial +
                                args.frame_subsampling_factor / 2
                                if left_context_initial >= 0 else -1)
    egs_right_context_final = (right_context_final +
                               args.frame_subsampling_factor / 2
                               if right_context_final >= 0 else -1)

    default_egs_dir = '{0}/egs'.format(args.dir)
    if (args.stage <= -3) and args.egs_dir is None:
        logger.info("Generating egs")
        # this is where get_egs.sh is called.
        chain_lib.generate_chain_egs(
            dir=args.dir,
            data=args.feat_dir,
            lat_dir=args.lat_dir,
            egs_dir=default_egs_dir,
            left_context=egs_left_context,
            right_context=egs_right_context,
            left_context_initial=egs_left_context_initial,
            right_context_final=egs_right_context_final,
            run_opts=run_opts,
            left_tolerance=args.left_tolerance,
            right_tolerance=args.right_tolerance,
            frame_subsampling_factor=args.frame_subsampling_factor,
            alignment_subsampling_factor=args.alignment_subsampling_factor,
            frames_per_eg_str=args.chunk_width,
            srand=args.srand,
            egs_opts=args.egs_opts,
            cmvn_opts=args.cmvn_opts,
            online_ivector_dir=args.online_ivector_dir,
            frames_per_iter=args.frames_per_iter,
            transform_dir=args.transform_dir,
            stage=args.egs_stage)

    if args.egs_dir is None:
        egs_dir = default_egs_dir
    else:
        egs_dir = args.egs_dir

    [egs_left_context, egs_right_context, frames_per_eg_str,
     num_archives] = (common_train_lib.verify_egs_dir(
         egs_dir, feat_dim, ivector_dim, ivector_id, egs_left_context,
         egs_right_context, egs_left_context_initial, egs_right_context_final))
    assert (args.chunk_width == frames_per_eg_str)
    num_archives_expanded = num_archives * args.frame_subsampling_factor

    if (args.num_jobs_final > num_archives_expanded):
        raise Exception('num_jobs_final cannot exceed the '
                        'expanded number of archives')

    # copy the properties of the egs to dir for
    # use during decoding
    logger.info("Copying the properties from {0} to {1}".format(
        egs_dir, args.dir))
    common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir)

    if (args.stage <= -2):
        logger.info('Computing the preconditioning matrix for input features')

        chain_lib.compute_preconditioning_matrix(
            args.dir,
            egs_dir,
            num_archives,
            run_opts,
            max_lda_jobs=args.max_lda_jobs,
            rand_prune=args.rand_prune)

    if (args.stage <= -1):
        logger.info("Preparing the initial acoustic model.")
        chain_lib.prepare_initial_acoustic_model(args.dir, run_opts)

    with open("{0}/frame_subsampling_factor".format(args.dir), "w") as f:
        f.write(str(args.frame_subsampling_factor))

    # set num_iters so that as close as possible, we process the data
    # $num_epochs times, i.e. $num_iters*$avg_num_jobs) ==
    # $num_epochs*$num_archives, where
    # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2.
    num_archives_to_process = int(args.num_epochs * num_archives_expanded)
    num_archives_processed = 0
    num_iters = ((num_archives_to_process * 2) /
                 (args.num_jobs_initial + args.num_jobs_final))

    models_to_combine = common_train_lib.verify_iterations(
        num_iters, args.num_epochs, num_hidden_layers, num_archives_expanded,
        args.max_models_combine, args.add_layers_period, args.num_jobs_final)

    def learning_rate(iter, current_num_jobs, num_archives_processed):
        return common_train_lib.get_learning_rate(iter, current_num_jobs,
                                                  num_iters,
                                                  num_archives_processed,
                                                  num_archives_to_process,
                                                  args.initial_effective_lrate,
                                                  args.final_effective_lrate)

    min_deriv_time = None
    max_deriv_time_relative = None
    if args.deriv_truncate_margin is not None:
        min_deriv_time = -args.deriv_truncate_margin - model_left_context
        max_deriv_time_relative = \
           args.deriv_truncate_margin + model_right_context

    logger.info("Training will run for {0} epochs = "
                "{1} iterations".format(args.num_epochs, num_iters))

    for iter in range(num_iters):
        if (args.exit_stage is not None) and (iter == args.exit_stage):
            logger.info("Exiting early due to --exit-stage {0}".format(iter))
            return
        current_num_jobs = int(0.5 + args.num_jobs_initial +
                               (args.num_jobs_final - args.num_jobs_initial) *
                               float(iter) / num_iters)

        if args.stage <= iter:
            model_file = "{dir}/{iter}.mdl".format(dir=args.dir, iter=iter)
            shrinkage_value = 1.0
            if args.shrink_value != 1.0:
                shrinkage_value = (
                    args.shrink_value if common_train_lib.do_shrinkage(
                        iter, model_file,
                        args.shrink_saturation_threshold) else 1)

            chain_lib.train_one_iteration(
                dir=args.dir,
                iter=iter,
                srand=args.srand,
                egs_dir=egs_dir,
                num_jobs=current_num_jobs,
                num_archives_processed=num_archives_processed,
                num_archives=num_archives,
                learning_rate=learning_rate(iter, current_num_jobs,
                                            num_archives_processed),
                dropout_edit_string=common_train_lib.get_dropout_edit_string(
                    args.dropout_schedule,
                    float(num_archives_processed) / num_archives_to_process,
                    iter),
                shrinkage_value=shrinkage_value,
                num_chunk_per_minibatch_str=args.num_chunk_per_minibatch,
                num_hidden_layers=num_hidden_layers,
                add_layers_period=args.add_layers_period,
                left_context=left_context,
                right_context=right_context,
                apply_deriv_weights=args.apply_deriv_weights,
                min_deriv_time=min_deriv_time,
                max_deriv_time_relative=max_deriv_time_relative,
                l2_regularize=args.l2_regularize,
                xent_regularize=args.xent_regularize,
                leaky_hmm_coefficient=args.leaky_hmm_coefficient,
                momentum=args.momentum,
                max_param_change=args.max_param_change,
                shuffle_buffer_size=args.shuffle_buffer_size,
                frame_subsampling_factor=args.frame_subsampling_factor,
                run_opts=run_opts,
                background_process_handler=background_process_handler)

            if args.cleanup:
                # do a clean up everythin but the last 2 models, under certain
                # conditions
                common_train_lib.remove_model(args.dir, iter - 2, num_iters,
                                              models_to_combine,
                                              args.preserve_model_interval)

            if args.email is not None:
                reporting_iter_interval = num_iters * args.reporting_interval
                if iter % reporting_iter_interval == 0:
                    # lets do some reporting
                    [report, times,
                     data] = (nnet3_log_parse.generate_acc_logprob_report(
                         args.dir, "log-probability"))
                    message = report
                    subject = ("Update : Expt {dir} : "
                               "Iter {iter}".format(dir=args.dir, iter=iter))
                    common_lib.send_mail(message, subject, args.email)

        num_archives_processed = num_archives_processed + current_num_jobs

    if args.stage <= num_iters:
        logger.info("Doing final combination to produce final.mdl")
        chain_lib.combine_models(
            dir=args.dir,
            num_iters=num_iters,
            models_to_combine=models_to_combine,
            num_chunk_per_minibatch_str=args.num_chunk_per_minibatch,
            egs_dir=egs_dir,
            left_context=left_context,
            right_context=right_context,
            leaky_hmm_coefficient=args.leaky_hmm_coefficient,
            l2_regularize=args.l2_regularize,
            xent_regularize=args.xent_regularize,
            run_opts=run_opts,
            background_process_handler=background_process_handler,
            sum_to_one_penalty=args.combine_sum_to_one_penalty)

    if args.cleanup:
        logger.info("Cleaning up the experiment directory "
                    "{0}".format(args.dir))
        remove_egs = args.remove_egs
        if args.egs_dir is not None:
            # this egs_dir was not created by this experiment so we will not
            # delete it
            remove_egs = False

        common_train_lib.clean_nnet_dir(
            args.dir,
            num_iters,
            egs_dir,
            preserve_model_interval=args.preserve_model_interval,
            remove_egs=remove_egs)

    # do some reporting
    [report, times, data
     ] = nnet3_log_parse.generate_acc_logprob_report(args.dir,
                                                     "log-probability")
    if args.email is not None:
        common_lib.send_mail(
            report, "Update : Expt {0} : "
            "complete".format(args.dir), args.email)

    with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f:
        f.write(report)

    common_lib.run_kaldi_command("steps/info/nnet3_dir_info.pl "
                                 "{0}".format(args.dir))
Esempio n. 11
0
def parse_progress_logs_for_param_diff(exp_dir, pattern):
    """ Parse progress logs for per-component parameter differences.

    e.g. for a line that is parsed from progress.*.log:
    exp/chain/cwrnn_trial2_ld5_sp/log/progress.245.log:LOG
    (nnet3-show-progress:main():nnet3-show-progress.cc:144) Relative parameter
    differences per layer are [ Cwrnn1_T3_W_r:0.0171537
    Cwrnn1_T3_W_x:1.33338e-07 Cwrnn1_T2_W_r:0.048075 Cwrnn1_T2_W_x:1.34088e-07
    Cwrnn1_T1_W_r:0.0157277 Cwrnn1_T1_W_x:0.0212704 Final_affine:0.0321521
    Cwrnn2_T3_W_r:0.0212082 Cwrnn2_T3_W_x:1.33691e-07 Cwrnn2_T2_W_r:0.0212978
    Cwrnn2_T2_W_x:1.33401e-07 Cwrnn2_T1_W_r:0.014976 Cwrnn2_T1_W_x:0.0233588
    Cwrnn3_T3_W_r:0.0237165 Cwrnn3_T3_W_x:1.33184e-07 Cwrnn3_T2_W_r:0.0239754
    Cwrnn3_T2_W_x:1.3296e-07 Cwrnn3_T1_W_r:0.0194809 Cwrnn3_T1_W_x:0.0271934 ]
    """

    if pattern not in set(
        ["Relative parameter differences", "Parameter differences"]):
        raise Exception("Unknown value for pattern : {0}".format(pattern))

    progress_log_files = "%s/log/progress.*.log" % (exp_dir)
    progress_per_iter = {}
    component_names = set([])
    progress_log_lines = common_lib.run_kaldi_command(
        'grep -e "{0}" {1}'.format(pattern, progress_log_files))[0]
    parse_regex = re.compile(".*progress\.([0-9]+)\.log:"
                             "LOG.*{0}.*\[(.*)\]".format(pattern))
    for line in progress_log_lines.split("\n"):
        mat_obj = parse_regex.search(line)
        if mat_obj is None:
            continue
        groups = mat_obj.groups()
        iteration = groups[0]
        differences = parse_difference_string(groups[1])
        component_names = component_names.union(differences.keys())
        progress_per_iter[int(iteration)] = differences

    component_names = list(component_names)
    component_names.sort()
    # rearranging the parameter differences available per iter
    # into parameter differences per component
    progress_per_component = {}
    for cn in component_names:
        progress_per_component[cn] = {}

    max_iter = max(progress_per_iter.keys())
    total_missing_iterations = 0
    gave_user_warning = False
    for iter in range(max_iter + 1):
        try:
            component_dict = progress_per_iter[iter]
        except KeyError:
            continue

        for component_name in component_names:
            try:
                progress_per_component[component_name][iter] = component_dict[
                    component_name]
            except KeyError:
                total_missing_iterations += 1
                # the component was not found this iteration, may be because of
                # layerwise discriminative training
                pass
        if (total_missing_iterations / len(component_names) > 20
                and not gave_user_warning and logger is not None):
            logger.warning(
                "There are more than {0} missing iterations per "
                "component. Something might be wrong.".format(
                    total_missing_iterations / len(component_names)))
            gave_user_warning = True

    return {
        'progress_per_component': progress_per_component,
        'component_names': component_names,
        'max_iter': max_iter
    }
Esempio n. 12
0
def parse_progress_logs_for_clipped_proportion(exp_dir):
    """ Parse progress logs for clipped proportion stats.

    e.g. for a line that is parsed from progress.*.log:
    exp/chain/cwrnn_trial2_ld5_sp/log/progress.245.log:component
    name=BLstm1_forward_c type=ClipGradientComponent, dim=512,
    norm-based-clipping=true, clipping-threshold=30,
    clipped-proportion=0.000565527,
    self-repair-clipped-proportion-threshold=0.01, self-repair-target=0,
    self-repair-scale=1
    """

    progress_log_files = "%s/log/progress.*.log" % (exp_dir)
    component_names = set([])
    progress_log_lines = common_lib.run_kaldi_command(
        'grep -e "{0}" {1}'.format("clipped-proportion",
                                   progress_log_files))[0]
    parse_regex = re.compile(".*progress\.([0-9]+)\.log:component "
                             "name=(.*) type=.* "
                             "clipped-proportion=([0-9\.e\-]+)")

    cp_per_component_per_iter = {}

    max_iteration = 0
    component_names = set([])
    for line in progress_log_lines.split("\n"):
        mat_obj = parse_regex.search(line)
        if mat_obj is None:
            if line.strip() == "":
                continue
            raise MalformedClippedProportionLineException(line)
        groups = mat_obj.groups()
        iteration = int(groups[0])
        max_iteration = max(max_iteration, iteration)
        name = groups[1]
        clipped_proportion = float(groups[2])
        if clipped_proportion > 1:
            raise MalformedClippedProportionLineException(line)
        if iteration not in cp_per_component_per_iter:
            cp_per_component_per_iter[iteration] = {}
        cp_per_component_per_iter[iteration][name] = clipped_proportion
        component_names.add(name)
    component_names = list(component_names)
    component_names.sort()

    # re arranging the data into an array
    # and into an cp_per_iter_per_component
    cp_per_iter_per_component = {}
    for component_name in component_names:
        cp_per_iter_per_component[component_name] = []
    data = []
    data.append(["iteration"] + component_names)
    for iter in range(max_iteration + 1):
        if iter not in cp_per_component_per_iter:
            continue
        comp_dict = cp_per_component_per_iter[iter]
        row = [iter]
        for component in component_names:
            try:
                row.append(comp_dict[component])
                cp_per_iter_per_component[component].append(
                    [iter, comp_dict[component]])
            except KeyError:
                # if clipped proportion is not available for a particular
                # component it is set to None
                # this usually happens during layer-wise discriminative
                # training
                row.append(None)
        data.append(row)

    return {
        'table': data,
        'cp_per_component_per_iter': cp_per_component_per_iter,
        'cp_per_iter_per_component': cp_per_iter_per_component
    }