def train(args, run_opts, background_process_handler): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Set some variables. # num_leaves = common_lib.get_number_of_leaves_from_tree(args.ali_dir) num_jobs = common_lib.get_number_of_jobs(args.ali_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.split_data(args.feat_dir, num_jobs) shutil.copy('{0}/tree'.format(args.ali_dir), args.dir) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] # this is really the number of times we add layers to the network for # discriminative pretraining num_hidden_layers = variables['num_hidden_layers'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. if (args.stage <= -5): logger.info("Initializing a basic network for estimating " "preconditioning matrix") common_lib.run_job( """{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) default_egs_dir = '{0}/egs'.format(args.dir) if (args.stage <= -4) and args.egs_dir is None: logger.info("Generating egs") train_lib.acoustic_model.generate_egs( data=args.feat_dir, alidir=args.ali_dir, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, frames_per_eg=args.frames_per_eg, srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, online_ivector_dir=args.online_ivector_dir, samples_per_iter=args.samples_per_iter, transform_dir=args.transform_dir, stage=args.egs_stage) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, left_context, right_context)) assert(args.frames_per_eg == frames_per_eg) if (args.num_jobs_final > num_archives): raise Exception('num_jobs_final cannot exceed the number of archives ' 'in the egs directory') # copy the properties of the egs to dir for # use during decoding common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if (args.stage <= -3): logger.info('Computing the preconditioning matrix for input features') train_lib.common.compute_preconditioning_matrix( args.dir, egs_dir, num_archives, run_opts, max_lda_jobs=args.max_lda_jobs, rand_prune=args.rand_prune) if (args.stage <= -2): logger.info("Computing initial vector for FixedScaleComponent before" " softmax, using priors^{prior_scale} and rescaling to" " average 1".format( prior_scale=args.presoftmax_prior_scale_power)) common_train_lib.compute_presoftmax_prior_scale( args.dir, args.ali_dir, num_jobs, run_opts, presoftmax_prior_scale_power=args.presoftmax_prior_scale_power) if (args.stage <= -1): logger.info("Preparing the initial acoustic model.") train_lib.acoustic_model.prepare_initial_acoustic_model( args.dir, args.ali_dir, run_opts) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_expanded = num_archives * args.frames_per_eg num_archives_to_process = args.num_epochs * num_archives_expanded num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) models_to_combine = common_train_lib.verify_iterations( num_iters, args.num_epochs, num_hidden_layers, num_archives_expanded, args.max_models_combine, args.add_layers_period, args.num_jobs_final) def learning_rate(iter, current_num_jobs, num_archives_processed): return common_train_lib.get_learning_rate(iter, current_num_jobs, num_iters, num_archives_processed, num_archives_to_process, args.initial_effective_lrate, args.final_effective_lrate) logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) for iter in range(num_iters): if (args.exit_stage is not None) and (iter == args.exit_stage): logger.info("Exiting early due to --exit-stage {0}".format(iter)) return current_num_jobs = int(0.5 + args.num_jobs_initial + (args.num_jobs_final - args.num_jobs_initial) * float(iter) / num_iters) if args.stage <= iter: logger.info("On iteration {0}, learning rate is {1}.".format( iter, learning_rate(iter, current_num_jobs, num_archives_processed))) train_lib.common.train_one_iteration( dir=args.dir, iter=iter, srand=args.srand, egs_dir=egs_dir, num_jobs=current_num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), minibatch_size=args.minibatch_size, frames_per_eg=args.frames_per_eg, num_hidden_layers=num_hidden_layers, add_layers_period=args.add_layers_period, left_context=left_context, right_context=right_context, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, run_opts=run_opts, background_process_handler=background_process_handler) if args.cleanup: # do a clean up everythin but the last 2 models, under certain # conditions common_train_lib.remove_model( args.dir, iter-2, num_iters, models_to_combine, args.preserve_model_interval) if args.email is not None: reporting_iter_interval = num_iters * args.reporting_interval if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( nnet3_log_parse.generate_accuracy_report(args.dir)) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) common_lib.send_mail(message, subject, args.email) num_archives_processed = num_archives_processed + current_num_jobs if args.stage <= num_iters: logger.info("Doing final combination to produce final.mdl") train_lib.common.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, egs_dir=egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, background_process_handler=background_process_handler) if args.stage <= num_iters + 1: logger.info("Getting average posterior for purposes of " "adjusting the priors.") avg_post_vec_file = train_lib.common.compute_average_posterior( dir=args.dir, iter='combined', egs_dir=egs_dir, num_archives=num_archives, left_context=left_context, right_context=right_context, prior_subset_size=args.prior_subset_size, run_opts=run_opts) logger.info("Re-adjusting priors based on computed posteriors") combined_model = "{dir}/combined.mdl".format(dir=args.dir) final_model = "{dir}/final.mdl".format(dir=args.dir) train_lib.common.adjust_am_priors(args.dir, combined_model, avg_post_vec_file, final_model, run_opts) if args.cleanup: logger.info("Cleaning up the experiment directory " "{0}".format(args.dir)) remove_egs = args.remove_egs if args.egs_dir is not None: # this egs_dir was not created by this experiment so we will not # delete it remove_egs = False common_train_lib.clean_nnet_dir( nnet_dir=args.dir, num_iters=num_iters, egs_dir=egs_dir, preserve_model_interval=args.preserve_model_interval, remove_egs=remove_egs) # do some reporting [report, times, data] = nnet3_log_parse.generate_accuracy_report(args.dir) if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f: f.write(report) common_lib.run_job("steps/info/nnet3_dir_info.pl " "{0}".format(args.dir))
def train(args, run_opts): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Check files chain_lib.check_for_required_files( args.feat_dir, args.tree_dir, args.lat_dir if args.egs_dir is None else None) # Copy phones.txt from tree-dir to dir. Later, steps/nnet3/decode.sh will # use it to check compatibility between training and decoding phone-sets. shutil.copy('{0}/phones.txt'.format(args.tree_dir), args.dir) # Set some variables. num_jobs = common_lib.get_number_of_jobs(args.tree_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.execute_command("utils/split_data.sh {0} {1}" "".format(args.feat_dir, num_jobs)) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) if args.input_model is None: config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) else: # If args.input_model is specified, the model left and right contexts # are computed using input_model. variables = common_train_lib.get_input_model_info(args.input_model) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context left_context_initial = (args.chunk_left_context_initial + model_left_context if args.chunk_left_context_initial >= 0 else -1) right_context_final = (args.chunk_right_context_final + model_right_context if args.chunk_right_context_final >= 0 else -1) # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. if (args.stage <= -6): logger.info("Creating phone language-model") chain_lib.create_phone_lm(args.dir, args.tree_dir, run_opts, lm_opts=args.lm_opts) if (args.stage <= -5): logger.info("Creating denominator FST") shutil.copy('{0}/tree'.format(args.tree_dir), args.dir) chain_lib.create_denominator_fst(args.dir, args.tree_dir, run_opts) if ((args.stage <= -4) and os.path.exists("{0}/configs/init.config".format(args.dir)) and (args.input_model is None)): logger.info("Initializing a basic network for estimating " "preconditioning matrix") common_lib.execute_command("""{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) egs_left_context = left_context + args.frame_subsampling_factor // 2 egs_right_context = right_context + args.frame_subsampling_factor // 2 # note: the '+ args.frame_subsampling_factor / 2' is to allow for the # fact that we'll be shifting the data slightly during training to give # variety to the training data. egs_left_context_initial = (left_context_initial + args.frame_subsampling_factor // 2 if left_context_initial >= 0 else -1) egs_right_context_final = (right_context_final + args.frame_subsampling_factor // 2 if right_context_final >= 0 else -1) default_egs_dir = '{0}/egs'.format(args.dir) if ((args.stage <= -3) and args.egs_dir is None): logger.info("Generating egs") if (not os.path.exists("{0}/den.fst".format(args.dir)) or not os.path.exists("{0}/normalization.fst".format(args.dir)) or not os.path.exists("{0}/tree".format(args.dir))): raise Exception("Chain egs generation expects {0}/den.fst, " "{0}/normalization.fst and {0}/tree " "to exist.".format(args.dir)) # this is where get_egs.sh is called. chain_lib.generate_chain_egs( dir=args.dir, data=args.feat_dir, lat_dir=args.lat_dir, egs_dir=default_egs_dir, left_context=egs_left_context, right_context=egs_right_context, left_context_initial=egs_left_context_initial, right_context_final=egs_right_context_final, run_opts=run_opts, left_tolerance=args.left_tolerance, right_tolerance=args.right_tolerance, frame_subsampling_factor=args.frame_subsampling_factor, alignment_subsampling_factor=args.alignment_subsampling_factor, frames_per_eg_str=args.chunk_width, srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, online_ivector_dir=args.online_ivector_dir, frames_per_iter=args.frames_per_iter, stage=args.egs_stage) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = (common_train_lib.verify_egs_dir( egs_dir, feat_dim, ivector_dim, ivector_id, egs_left_context, egs_right_context, egs_left_context_initial, egs_right_context_final)) assert (args.chunk_width == frames_per_eg_str) num_archives_expanded = num_archives * args.frame_subsampling_factor if (args.num_jobs_final > num_archives_expanded): raise Exception('num_jobs_final cannot exceed the ' 'expanded number of archives') # copy the properties of the egs to dir for # use during decoding logger.info("Copying the properties from {0} to {1}".format( egs_dir, args.dir)) common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if not os.path.exists('{0}/valid_diagnostic.cegs'.format(egs_dir)): if (not os.path.exists('{0}/valid_diagnostic.scp'.format(egs_dir))): raise Exception('Neither {0}/valid_diagnostic.cegs nor ' '{0}/valid_diagnostic.scp exist.' 'This script expects one of them.'.format(egs_dir)) use_multitask_egs = True else: use_multitask_egs = False if ((args.stage <= -2) and (os.path.exists(args.dir + "/configs/init.config")) and (args.input_model is None)): logger.info('Computing the preconditioning matrix for input features') chain_lib.compute_preconditioning_matrix( args.dir, egs_dir, num_archives, run_opts, max_lda_jobs=args.max_lda_jobs, rand_prune=args.rand_prune, use_multitask_egs=use_multitask_egs) if (args.stage <= -1): logger.info("Preparing the initial acoustic model.") chain_lib.prepare_initial_acoustic_model(args.dir, run_opts, input_model=args.input_model) with open("{0}/frame_subsampling_factor".format(args.dir), "w") as f: f.write(str(args.frame_subsampling_factor)) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_to_process = int(args.num_epochs * num_archives_expanded) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) // (args.num_jobs_initial + args.num_jobs_final)) # If do_final_combination is True, compute the set of models_to_combine. # Otherwise, models_to_combine will be none. if args.do_final_combination: models_to_combine = common_train_lib.get_model_combine_iters( num_iters, args.num_epochs, num_archives_expanded, args.max_models_combine, args.num_jobs_final) else: models_to_combine = None min_deriv_time = None max_deriv_time_relative = None if args.deriv_truncate_margin is not None: min_deriv_time = -args.deriv_truncate_margin - model_left_context max_deriv_time_relative = \ args.deriv_truncate_margin + model_right_context logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) for iter in range(num_iters): if (args.exit_stage is not None) and (iter == args.exit_stage): logger.info("Exiting early due to --exit-stage {0}".format(iter)) return current_num_jobs = int(0.5 + args.num_jobs_initial + (args.num_jobs_final - args.num_jobs_initial) * float(iter) / num_iters) if args.stage <= iter: model_file = "{dir}/{iter}.mdl".format(dir=args.dir, iter=iter) lrate = common_train_lib.get_learning_rate( iter, current_num_jobs, num_iters, num_archives_processed, num_archives_to_process, args.initial_effective_lrate, args.final_effective_lrate) shrinkage_value = 1.0 - (args.proportional_shrink * lrate) if shrinkage_value <= 0.5: raise Exception( "proportional-shrink={0} is too large, it gives " "shrink-value={1}".format(args.proportional_shrink, shrinkage_value)) if args.shrink_value < shrinkage_value: shrinkage_value = ( args.shrink_value if common_train_lib.should_do_shrinkage( iter, model_file, args.shrink_saturation_threshold) else shrinkage_value) percent = num_archives_processed * 100.0 / num_archives_to_process epoch = (num_archives_processed * args.num_epochs / num_archives_to_process) shrink_info_str = '' if shrinkage_value != 1.0: shrink_info_str = 'shrink: {0:0.5f}'.format(shrinkage_value) logger.info("Iter: {0}/{1} " "Epoch: {2:0.2f}/{3:0.1f} ({4:0.1f}% complete) " "lr: {5:0.6f} {6}".format(iter, num_iters - 1, epoch, args.num_epochs, percent, lrate, shrink_info_str)) chain_lib.train_one_iteration( dir=args.dir, iter=iter, srand=args.srand, egs_dir=egs_dir, num_jobs=current_num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, learning_rate=lrate, dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), train_opts=' '.join(args.train_opts), shrinkage_value=shrinkage_value, num_chunk_per_minibatch_str=args.num_chunk_per_minibatch, apply_deriv_weights=args.apply_deriv_weights, min_deriv_time=min_deriv_time, max_deriv_time_relative=max_deriv_time_relative, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, leaky_hmm_coefficient=args.leaky_hmm_coefficient, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, frame_subsampling_factor=args.frame_subsampling_factor, run_opts=run_opts, backstitch_training_scale=args.backstitch_training_scale, backstitch_training_interval=args.backstitch_training_interval, use_multitask_egs=use_multitask_egs) if args.cleanup: # do a clean up everything but the last 2 models, under certain # conditions common_train_lib.remove_model(args.dir, iter - 2, num_iters, models_to_combine, args.preserve_model_interval) if args.email is not None: reporting_iter_interval = num_iters * args.reporting_interval if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = (nnet3_log_parse.generate_acc_logprob_report( args.dir, "log-probability")) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) common_lib.send_mail(message, subject, args.email) num_archives_processed = num_archives_processed + current_num_jobs if args.stage <= num_iters: if args.do_final_combination: logger.info("Doing final combination to produce final.mdl") chain_lib.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, num_chunk_per_minibatch_str=args.num_chunk_per_minibatch, egs_dir=egs_dir, leaky_hmm_coefficient=args.leaky_hmm_coefficient, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, run_opts=run_opts, max_objective_evaluations=args.max_objective_evaluations, use_multitask_egs=use_multitask_egs) else: logger.info("Copying the last-numbered model to final.mdl") common_lib.force_symlink("{0}.mdl".format(num_iters), "{0}/final.mdl".format(args.dir)) chain_lib.compute_train_cv_probabilities( dir=args.dir, iter=num_iters, egs_dir=egs_dir, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, leaky_hmm_coefficient=args.leaky_hmm_coefficient, run_opts=run_opts, use_multitask_egs=use_multitask_egs) common_lib.force_symlink( "compute_prob_valid.{iter}.log" "".format(iter=num_iters), "{dir}/log/compute_prob_valid.final.log".format(dir=args.dir)) if args.cleanup: logger.info("Cleaning up the experiment directory " "{0}".format(args.dir)) remove_egs = args.remove_egs if args.egs_dir is not None: # this egs_dir was not created by this experiment so we will not # delete it remove_egs = False # leave the last-two-numbered models, for diagnostic reasons. common_train_lib.clean_nnet_dir( args.dir, num_iters - 1, egs_dir, preserve_model_interval=args.preserve_model_interval, remove_egs=remove_egs) # do some reporting [report, times, data ] = nnet3_log_parse.generate_acc_logprob_report(args.dir, "log-probability") if args.email is not None: common_lib.send_mail( report, "Update : Expt {0} : " "complete".format(args.dir), args.email) with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f: f.write(report) common_lib.execute_command("steps/info/chain_dir_info.pl " "{0}".format(args.dir))
def train(args, run_opts): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Copy phones.txt from ali-dir to dir. Later, steps/nnet3/decode.sh will # use it to check compatibility between training and decoding phone-sets. shutil.copy('{0}/phones.txt'.format(args.ali_dir), args.dir) # Set some variables. # num_leaves = common_lib.get_number_of_leaves_from_tree(args.ali_dir) num_jobs = common_lib.get_number_of_jobs(args.ali_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.execute_command("utils/split_data.sh {0} {1}".format( args.feat_dir, num_jobs)) shutil.copy('{0}/tree'.format(args.ali_dir), args.dir) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) if args.input_model is None: config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) else: # If args.input_model is specified, the model left and right contexts # are computed using input_model. variables = common_train_lib.get_input_model_info(args.input_model) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = model_left_context right_context = model_right_context # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. if (args.stage <= -5) and os.path.exists(args.dir+"/configs/init.config") and \ (args.input_model is None): logger.info("Initializing a basic network for estimating " "preconditioning matrix") common_lib.execute_command("""{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) default_egs_dir = '{0}/egs'.format(args.dir) if (args.stage <= -4) and args.egs_dir is None: logger.info("Generating egs") if args.feat_dir is None: raise Exception( "--feat-dir option is required if you don't supply --egs-dir") train_lib.acoustic_model.generate_egs( data=args.feat_dir, alidir=args.ali_dir, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, frames_per_eg_str=str(args.frames_per_eg), srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, online_ivector_dir=args.online_ivector_dir, samples_per_iter=args.samples_per_iter, stage=args.egs_stage) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = (common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, ivector_id, left_context, right_context)) assert str(args.frames_per_eg) == frames_per_eg_str if args.num_jobs_final > num_archives: raise Exception('num_jobs_final cannot exceed the number of archives ' 'in the egs directory') # copy the properties of the egs to dir for # use during decoding common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if args.stage <= -3 and os.path.exists(args.dir + "/configs/init.config") and ( args.input_model is None): logger.info('Computing the preconditioning matrix for input features') train_lib.common.compute_preconditioning_matrix( args.dir, egs_dir, num_archives, run_opts, max_lda_jobs=args.max_lda_jobs, rand_prune=args.rand_prune) if args.stage <= -2 and (args.input_model is None): logger.info( "Computing initial vector for FixedScaleComponent before" " softmax, using priors^{prior_scale} and rescaling to" " average 1".format(prior_scale=args.presoftmax_prior_scale_power)) common_train_lib.compute_presoftmax_prior_scale( args.dir, args.ali_dir, num_jobs, run_opts, presoftmax_prior_scale_power=args.presoftmax_prior_scale_power) if args.stage <= -1: logger.info("Preparing the initial acoustic model.") train_lib.acoustic_model.prepare_initial_acoustic_model( args.dir, args.ali_dir, run_opts, input_model=args.input_model) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_expanded = num_archives * args.frames_per_eg num_archives_to_process = int(args.num_epochs * num_archives_expanded) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) # If do_final_combination is True, compute the set of models_to_combine. # Otherwise, models_to_combine will be none. if args.do_final_combination: models_to_combine = common_train_lib.get_model_combine_iters( num_iters, args.num_epochs, num_archives_expanded, args.max_models_combine, args.num_jobs_final) else: models_to_combine = None logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) for iter in range(num_iters): if (args.exit_stage is not None) and (iter == args.exit_stage): logger.info("Exiting early due to --exit-stage {0}".format(iter)) return current_num_jobs = int(0.5 + args.num_jobs_initial + (args.num_jobs_final - args.num_jobs_initial) * float(iter) / num_iters) if args.stage <= iter: lrate = common_train_lib.get_learning_rate( iter, current_num_jobs, num_iters, num_archives_processed, num_archives_to_process, args.initial_effective_lrate, args.final_effective_lrate) shrinkage_value = 1.0 - (args.proportional_shrink * lrate) if shrinkage_value <= 0.5: raise Exception( "proportional-shrink={0} is too large, it gives " "shrink-value={1}".format(args.proportional_shrink, shrinkage_value)) percent = num_archives_processed * 100.0 / num_archives_to_process epoch = (num_archives_processed * args.num_epochs / num_archives_to_process) shrink_info_str = '' if shrinkage_value != 1.0: shrink_info_str = 'shrink: {0:0.5f}'.format(shrinkage_value) logger.info("Iter: {0}/{1} " "Epoch: {2:0.2f}/{3:0.1f} ({4:0.1f}% complete) " "lr: {5:0.6f} {6}".format(iter, num_iters - 1, epoch, args.num_epochs, percent, lrate, shrink_info_str)) train_lib.common.train_one_iteration( dir=args.dir, iter=iter, srand=args.srand, egs_dir=egs_dir, num_jobs=current_num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, learning_rate=lrate, dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), train_opts=' '.join(args.train_opts), minibatch_size_str=args.minibatch_size, frames_per_eg=args.frames_per_eg, momentum=args.momentum, max_param_change=args.max_param_change, shrinkage_value=shrinkage_value, shuffle_buffer_size=args.shuffle_buffer_size, run_opts=run_opts) if args.cleanup: # do a clean up everythin but the last 2 models, under certain # conditions common_train_lib.remove_model(args.dir, iter - 2, num_iters, models_to_combine, args.preserve_model_interval) if args.email is not None: reporting_iter_interval = num_iters * args.reporting_interval if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = (nnet3_log_parse.generate_acc_logprob_report( args.dir)) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) common_lib.send_mail(message, subject, args.email) num_archives_processed = num_archives_processed + current_num_jobs if args.stage <= num_iters: if args.do_final_combination: logger.info("Doing final combination to produce final.mdl") train_lib.common.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, egs_dir=egs_dir, minibatch_size_str=args.minibatch_size, run_opts=run_opts, max_objective_evaluations=args.max_objective_evaluations) if args.stage <= num_iters + 1: logger.info("Getting average posterior for purposes of " "adjusting the priors.") # If args.do_final_combination is true, we will use the combined model. # Otherwise, we will use the last_numbered model. real_iter = 'combined' if args.do_final_combination else num_iters avg_post_vec_file = train_lib.common.compute_average_posterior( dir=args.dir, iter=real_iter, egs_dir=egs_dir, num_archives=num_archives, prior_subset_size=args.prior_subset_size, run_opts=run_opts) logger.info("Re-adjusting priors based on computed posteriors") combined_or_last_numbered_model = "{dir}/{iter}.mdl".format( dir=args.dir, iter=real_iter) final_model = "{dir}/final.mdl".format(dir=args.dir) train_lib.common.adjust_am_priors(args.dir, combined_or_last_numbered_model, avg_post_vec_file, final_model, run_opts) if args.cleanup: logger.info("Cleaning up the experiment directory " "{0}".format(args.dir)) remove_egs = args.remove_egs if args.egs_dir is not None: # this egs_dir was not created by this experiment so we will not # delete it remove_egs = False common_train_lib.clean_nnet_dir( nnet_dir=args.dir, num_iters=num_iters, egs_dir=egs_dir, preserve_model_interval=args.preserve_model_interval, remove_egs=remove_egs) # do some reporting [report, times, data] = nnet3_log_parse.generate_acc_logprob_report(args.dir) if args.email is not None: common_lib.send_mail( report, "Update : Expt {0} : " "complete".format(args.dir), args.email) with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f: f.write(report) common_lib.execute_command("steps/info/nnet3_dir_info.pl " "{0}".format(args.dir))
def train(args, run_opts): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Check files files = ['{0}/feats.scp'.format(args.feat_dir), '{0}/fst.1.scp'.format(args.tree_dir), '{0}/final.mdl'.format(args.tree_dir), '{0}/tree'.format(args.tree_dir), '{0}/phone_lm.fst'.format(args.tree_dir), '{0}/num_jobs'.format(args.tree_dir)] for file in files: if not os.path.isfile(file): raise Exception('Expected {0} to exist.'.format(file)) # Set some variables. num_jobs = common_lib.get_number_of_jobs(args.tree_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) logger.info("feat-dim: {}, ivector-dim: {}".format(feat_dim, ivector_dim)) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for compiling FSTs common_lib.execute_command("utils/split_data.sh {0} {1}".format( args.feat_dir, num_jobs)) shutil.copy('{0}/tree'.format(args.tree_dir), args.dir) shutil.copy('{0}/phones.txt'.format(args.tree_dir), args.dir) shutil.copy('{0}/phone_lm.fst'.format(args.tree_dir), args.dir) shutil.copy('{0}/0.trans_mdl'.format(args.tree_dir), args.dir) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context left_context_initial = (args.chunk_left_context_initial + model_left_context if args.chunk_left_context_initial >= 0 else -1) right_context_final = (args.chunk_right_context_final + model_right_context if args.chunk_right_context_final >= 0 else -1) # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. if (args.stage <= -5): logger.info("Creating denominator FST") chain_lib.create_denominator_fst(args.dir, args.tree_dir, run_opts) if (args.stage <= -4): logger.info("Initializing a basic network...") common_lib.execute_command( """{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/final.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) egs_left_context = left_context + args.frame_subsampling_factor / 2 egs_right_context = right_context + args.frame_subsampling_factor / 2 egs_left_context_initial = (left_context_initial + args.frame_subsampling_factor / 2 if left_context_initial >= 0 else -1) egs_right_context_final = (right_context_final + args.frame_subsampling_factor / 2 if right_context_final >= 0 else -1) default_egs_dir = '{0}/egs'.format(args.dir) if (args.stage <= -3) and args.egs_dir is None: logger.info("Generating end-to-end egs...") common_lib.execute_command( """steps/nnet3/chain/e2e/get_egs_e2e.sh {egs_opts} \ --cmd "{command}" \ --cmvn-opts "{cmvn_opts}" \ --online-ivector-dir "{ivector_dir}" \ --left-context {left_context} \ --right-context {right_context} \ --left-context-initial {left_context_initial} \ --right-context-final {right_context_final} \ --frame-subsampling-factor {frame_subsampling_factor} \ --stage {stage} \ --frames-per-iter {frames_per_iter} \ --srand {srand} \ {data} {dir} {fst_dir} {egs_dir}""".format( command=run_opts.command, cmvn_opts=args.cmvn_opts if args.cmvn_opts is not None else '', ivector_dir=(args.online_ivector_dir if args.online_ivector_dir is not None else ''), left_context=egs_left_context, right_context=egs_right_context, left_context_initial=egs_left_context_initial, right_context_final=egs_right_context_final, frame_subsampling_factor=args.frame_subsampling_factor, stage=args.egs_stage, frames_per_iter=args.frames_per_iter, srand=args.srand, data=args.feat_dir, dir=args.dir, fst_dir=args.tree_dir, egs_dir=default_egs_dir, egs_opts=args.egs_opts if args.egs_opts is not None else '')) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, ivector_id, egs_left_context, egs_right_context, egs_left_context_initial, egs_right_context_final)) num_archives_expanded = num_archives * args.frame_subsampling_factor if (args.num_jobs_final > num_archives_expanded): raise Exception('num_jobs_final cannot exceed the ' 'expanded number of archives') # copy the properties of the egs to dir for # use during decoding logger.info("Copying the properties from {0} to {1}".format(egs_dir, args.dir)) common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if (args.stage <= -1): logger.info("Preparing the initial acoustic model.") chain_lib.prepare_initial_acoustic_model(args.dir, run_opts) with open("{0}/frame_subsampling_factor".format(args.dir), "w") as f: f.write(str(args.frame_subsampling_factor)) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_to_process = int(args.num_epochs * num_archives_expanded) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) models_to_combine = common_train_lib.get_model_combine_iters( num_iters, args.num_epochs, num_archives_expanded, args.max_models_combine, args.num_jobs_final) min_deriv_time = None max_deriv_time_relative = None if args.deriv_truncate_margin is not None: min_deriv_time = -args.deriv_truncate_margin - model_left_context max_deriv_time_relative = \ args.deriv_truncate_margin + model_right_context logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) for iter in range(num_iters): percent = num_archives_processed * 100.0 / num_archives_to_process epoch = (num_archives_processed * args.num_epochs / num_archives_to_process) if (args.exit_stage is not None) and (iter == args.exit_stage): logger.info("Exiting early due to --exit-stage {0}".format(iter)) return current_num_jobs = int(0.5 + args.num_jobs_initial + (args.num_jobs_final - args.num_jobs_initial) * float(iter) / num_iters) if args.stage <= iter: model_file = "{dir}/{iter}.mdl".format(dir=args.dir, iter=iter) lrate = common_train_lib.get_learning_rate(iter, current_num_jobs, num_iters, num_archives_processed, num_archives_to_process, args.initial_effective_lrate, args.final_effective_lrate) shrinkage_value = 1.0 - (args.proportional_shrink * lrate) if shrinkage_value <= 0.5: raise Exception("proportional-shrink={0} is too large, it gives " "shrink-value={1}".format(args.proportional_shrink, shrinkage_value)) if args.shrink_value < shrinkage_value: shrinkage_value = (args.shrink_value if common_train_lib.should_do_shrinkage( iter, model_file, args.shrink_saturation_threshold) else shrinkage_value) shrink_info_str = '' if shrinkage_value != 1.0: shrink_info_str = 'shrink: {0:0.5f}'.format(shrinkage_value) logger.info("Iter: {0}/{1} " "Epoch: {2:0.2f}/{3:0.1f} ({4:0.1f}% complete) " "lr: {5:0.6f} {6}".format(iter, num_iters - 1, epoch, args.num_epochs, percent, lrate, shrink_info_str)) chain_lib.train_one_iteration( dir=args.dir, iter=iter, srand=args.srand, egs_dir=egs_dir, num_jobs=current_num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, learning_rate=lrate, dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), shrinkage_value=shrinkage_value, num_chunk_per_minibatch_str=args.num_chunk_per_minibatch, apply_deriv_weights=args.apply_deriv_weights, min_deriv_time=min_deriv_time, max_deriv_time_relative=max_deriv_time_relative, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, leaky_hmm_coefficient=args.leaky_hmm_coefficient, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, frame_subsampling_factor=args.frame_subsampling_factor, run_opts=run_opts) if args.cleanup: # do a clean up everything but the last 2 models, under certain # conditions common_train_lib.remove_model( args.dir, iter-2, num_iters, models_to_combine, args.preserve_model_interval) if args.email is not None: reporting_iter_interval = num_iters * args.reporting_interval if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( nnet3_log_parse.generate_acc_logprob_report( args.dir, "log-probability")) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) common_lib.send_mail(message, subject, args.email) num_archives_processed = num_archives_processed + current_num_jobs if args.stage <= num_iters: logger.info("Doing final combination to produce final.mdl") chain_lib.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, num_chunk_per_minibatch_str=args.num_chunk_per_minibatch, egs_dir=egs_dir, leaky_hmm_coefficient=args.leaky_hmm_coefficient, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, run_opts=run_opts) if args.cleanup: logger.info("Cleaning up the experiment directory " "{0}".format(args.dir)) remove_egs = args.remove_egs if args.egs_dir is not None: # this egs_dir was not created by this experiment so we will not # delete it remove_egs = False common_train_lib.clean_nnet_dir( args.dir, num_iters, egs_dir, preserve_model_interval=args.preserve_model_interval, remove_egs=remove_egs) # do some reporting [report, times, data] = nnet3_log_parse.generate_acc_logprob_report( args.dir, "log-probability") if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f: f.write(report) common_lib.execute_command("steps/info/chain_dir_info.pl " "{0}".format(args.dir))
def train(args, run_opts): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Check files chain_lib.check_for_required_files(args.feat_dir, args.tree_dir, args.lat_dir) # Copy phones.txt from tree-dir to dir. Later, steps/nnet3/decode.sh will # use it to check compatibility between training and decoding phone-sets. shutil.copy('{0}/phones.txt'.format(args.tree_dir), args.dir) # Set some variables. num_jobs = common_lib.get_number_of_jobs(args.tree_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.execute_command("utils/split_data.sh {0} {1}" "".format(args.feat_dir, num_jobs)) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) if args.input_model is None: config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) else: # If args.input_model is specified, the model left and right contexts # are computed using input_model. variables = common_train_lib.get_input_model_info(args.input_model) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context left_context_initial = (args.chunk_left_context_initial + model_left_context if args.chunk_left_context_initial >= 0 else -1) right_context_final = (args.chunk_right_context_final + model_right_context if args.chunk_right_context_final >= 0 else -1) # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. if (args.stage <= -6): logger.info("Creating phone language-model") chain_lib.create_phone_lm(args.dir, args.tree_dir, run_opts, lm_opts=args.lm_opts) if (args.stage <= -5): logger.info("Creating denominator FST") shutil.copy('{0}/tree'.format(args.tree_dir), args.dir) chain_lib.create_denominator_fst(args.dir, args.tree_dir, run_opts) if ((args.stage <= -4) and os.path.exists("{0}/configs/init.config".format(args.dir)) and (args.input_model is None)): logger.info("Initializing a basic network for estimating " "preconditioning matrix") common_lib.execute_command( """{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) egs_left_context = left_context + args.frame_subsampling_factor / 2 egs_right_context = right_context + args.frame_subsampling_factor / 2 # note: the '+ args.frame_subsampling_factor / 2' is to allow for the # fact that we'll be shifting the data slightly during training to give # variety to the training data. egs_left_context_initial = (left_context_initial + args.frame_subsampling_factor / 2 if left_context_initial >= 0 else -1) egs_right_context_final = (right_context_final + args.frame_subsampling_factor / 2 if right_context_final >= 0 else -1) default_egs_dir = '{0}/egs'.format(args.dir) if ((args.stage <= -3) and args.egs_dir is None): logger.info("Generating egs") if (not os.path.exists("{0}/den.fst".format(args.dir)) or not os.path.exists("{0}/normalization.fst".format(args.dir)) or not os.path.exists("{0}/tree".format(args.dir))): raise Exception("Chain egs generation expects {0}/den.fst, " "{0}/normalization.fst and {0}/tree " "to exist.".format(args.dir)) # this is where get_egs.sh is called. chain_lib.generate_chain_egs( dir=args.dir, data=args.feat_dir, lat_dir=args.lat_dir, egs_dir=default_egs_dir, left_context=egs_left_context, right_context=egs_right_context, left_context_initial=egs_left_context_initial, right_context_final=egs_right_context_final, run_opts=run_opts, left_tolerance=args.left_tolerance, right_tolerance=args.right_tolerance, frame_subsampling_factor=args.frame_subsampling_factor, alignment_subsampling_factor=args.alignment_subsampling_factor, frames_per_eg_str=args.chunk_width, srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, online_ivector_dir=args.online_ivector_dir, frames_per_iter=args.frames_per_iter, transform_dir=args.transform_dir, stage=args.egs_stage) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, ivector_id, egs_left_context, egs_right_context, egs_left_context_initial, egs_right_context_final)) assert(args.chunk_width == frames_per_eg_str) num_archives_expanded = num_archives * args.frame_subsampling_factor if (args.num_jobs_final > num_archives_expanded): raise Exception('num_jobs_final cannot exceed the ' 'expanded number of archives') # copy the properties of the egs to dir for # use during decoding logger.info("Copying the properties from {0} to {1}".format(egs_dir, args.dir)) common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if ((args.stage <= -2) and (os.path.exists(args.dir+"/configs/init.config")) and (args.input_model is None)): logger.info('Computing the preconditioning matrix for input features') chain_lib.compute_preconditioning_matrix( args.dir, egs_dir, num_archives, run_opts, max_lda_jobs=args.max_lda_jobs, rand_prune=args.rand_prune) if (args.stage <= -1): logger.info("Preparing the initial acoustic model.") chain_lib.prepare_initial_acoustic_model(args.dir, run_opts, input_model=args.input_model) with open("{0}/frame_subsampling_factor".format(args.dir), "w") as f: f.write(str(args.frame_subsampling_factor)) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_to_process = int(args.num_epochs * num_archives_expanded) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) # If do_final_combination is True, compute the set of models_to_combine. # Otherwise, models_to_combine will be none. if args.do_final_combination: models_to_combine = common_train_lib.get_model_combine_iters( num_iters, args.num_epochs, num_archives_expanded, args.max_models_combine, args.num_jobs_final) else: models_to_combine = None min_deriv_time = None max_deriv_time_relative = None if args.deriv_truncate_margin is not None: min_deriv_time = -args.deriv_truncate_margin - model_left_context max_deriv_time_relative = \ args.deriv_truncate_margin + model_right_context logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) for iter in range(num_iters): if (args.exit_stage is not None) and (iter == args.exit_stage): logger.info("Exiting early due to --exit-stage {0}".format(iter)) return current_num_jobs = int(0.5 + args.num_jobs_initial + (args.num_jobs_final - args.num_jobs_initial) * float(iter) / num_iters) if args.stage <= iter: model_file = "{dir}/{iter}.mdl".format(dir=args.dir, iter=iter) lrate = common_train_lib.get_learning_rate(iter, current_num_jobs, num_iters, num_archives_processed, num_archives_to_process, args.initial_effective_lrate, args.final_effective_lrate) shrinkage_value = 1.0 - (args.proportional_shrink * lrate) if shrinkage_value <= 0.5: raise Exception("proportional-shrink={0} is too large, it gives " "shrink-value={1}".format(args.proportional_shrink, shrinkage_value)) if args.shrink_value < shrinkage_value: shrinkage_value = (args.shrink_value if common_train_lib.should_do_shrinkage( iter, model_file, args.shrink_saturation_threshold) else shrinkage_value) percent = num_archives_processed * 100.0 / num_archives_to_process epoch = (num_archives_processed * args.num_epochs / num_archives_to_process) shrink_info_str = '' if shrinkage_value != 1.0: shrink_info_str = 'shrink: {0:0.5f}'.format(shrinkage_value) logger.info("Iter: {0}/{1} " "Epoch: {2:0.2f}/{3:0.1f} ({4:0.1f}% complete) " "lr: {5:0.6f} {6}".format(iter, num_iters - 1, epoch, args.num_epochs, percent, lrate, shrink_info_str)) chain_lib.train_one_iteration( dir=args.dir, iter=iter, srand=args.srand, egs_dir=egs_dir, num_jobs=current_num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, learning_rate=lrate, dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), train_opts=' '.join(args.train_opts), shrinkage_value=shrinkage_value, num_chunk_per_minibatch_str=args.num_chunk_per_minibatch, apply_deriv_weights=args.apply_deriv_weights, min_deriv_time=min_deriv_time, max_deriv_time_relative=max_deriv_time_relative, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, leaky_hmm_coefficient=args.leaky_hmm_coefficient, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, frame_subsampling_factor=args.frame_subsampling_factor, run_opts=run_opts, backstitch_training_scale=args.backstitch_training_scale, backstitch_training_interval=args.backstitch_training_interval) if args.cleanup: # do a clean up everything but the last 2 models, under certain # conditions common_train_lib.remove_model( args.dir, iter-2, num_iters, models_to_combine, args.preserve_model_interval) if args.email is not None: reporting_iter_interval = num_iters * args.reporting_interval if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( nnet3_log_parse.generate_acc_logprob_report( args.dir, "log-probability")) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) common_lib.send_mail(message, subject, args.email) num_archives_processed = num_archives_processed + current_num_jobs if args.stage <= num_iters: if args.do_final_combination: logger.info("Doing final combination to produce final.mdl") chain_lib.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, num_chunk_per_minibatch_str=args.num_chunk_per_minibatch, egs_dir=egs_dir, leaky_hmm_coefficient=args.leaky_hmm_coefficient, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, run_opts=run_opts, max_objective_evaluations=args.max_objective_evaluations) else: logger.info("Copying the last-numbered model to final.mdl") common_lib.force_symlink("{0}.mdl".format(num_iters), "{0}/final.mdl".format(args.dir)) common_lib.force_symlink("compute_prob_valid.{iter}.log" "".format(iter=num_iters-1), "{dir}/log/compute_prob_valid.final.log".format( dir=args.dir)) if args.cleanup: logger.info("Cleaning up the experiment directory " "{0}".format(args.dir)) remove_egs = args.remove_egs if args.egs_dir is not None: # this egs_dir was not created by this experiment so we will not # delete it remove_egs = False # leave the last-two-numbered models, for diagnostic reasons. common_train_lib.clean_nnet_dir( args.dir, num_iters - 1, egs_dir, preserve_model_interval=args.preserve_model_interval, remove_egs=remove_egs) # do some reporting [report, times, data] = nnet3_log_parse.generate_acc_logprob_report( args.dir, "log-probability") if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f: f.write(report) common_lib.execute_command("steps/info/chain_dir_info.pl " "{0}".format(args.dir))
def train(args, run_opts, background_process_handler): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Check files chain_lib.check_for_required_files(args.feat_dir, args.tree_dir, args.lat_dir) # Set some variables. num_jobs = common_lib.get_number_of_jobs(args.tree_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.split_data(args.feat_dir, num_jobs) shutil.copy('{0}/tree'.format(args.tree_dir), args.dir) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] # this is really the number of times we add layers to the network for # discriminative pretraining num_hidden_layers = variables['num_hidden_layers'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. if (args.stage <= -6): logger.info("Creating phone language-model") chain_lib.create_phone_lm(args.dir, args.tree_dir, run_opts, lm_opts=args.lm_opts) if (args.stage <= -5): logger.info("Creating denominator FST") chain_lib.create_denominator_fst(args.dir, args.tree_dir, run_opts) if (args.stage <= -4): logger.info("Initializing a basic network for estimating " "preconditioning matrix") common_lib.run_kaldi_command( """{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) egs_left_context = left_context + args.frame_subsampling_factor/2 egs_right_context = right_context + args.frame_subsampling_factor/2 default_egs_dir = '{0}/egs'.format(args.dir) if (args.stage <= -3) and args.egs_dir is None: logger.info("Generating egs") # this is where get_egs.sh is called. chain_lib.generate_chain_egs( dir=args.dir, data=args.feat_dir, lat_dir=args.lat_dir, egs_dir=default_egs_dir, left_context=egs_left_context, right_context=egs_right_context, run_opts=run_opts, left_tolerance=args.left_tolerance, right_tolerance=args.right_tolerance, frame_subsampling_factor=args.frame_subsampling_factor, alignment_subsampling_factor=args.alignment_subsampling_factor, frames_per_eg=args.chunk_width, srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, online_ivector_dir=args.online_ivector_dir, frames_per_iter=args.frames_per_iter, transform_dir=args.transform_dir, stage=args.egs_stage) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, egs_left_context, egs_right_context)) assert(args.chunk_width == frames_per_eg) num_archives_expanded = num_archives * args.frame_subsampling_factor if (args.num_jobs_final > num_archives_expanded): raise Exception('num_jobs_final cannot exceed the ' 'expanded number of archives') # copy the properties of the egs to dir for # use during decoding common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if (args.stage <= -2): logger.info('Computing the preconditioning matrix for input features') chain_lib.compute_preconditioning_matrix( args.dir, egs_dir, num_archives, run_opts, max_lda_jobs=args.max_lda_jobs, rand_prune=args.rand_prune) if (args.stage <= -1): logger.info("Preparing the initial acoustic model.") chain_lib.prepare_initial_acoustic_model(args.dir, run_opts) with open("{0}/frame_subsampling_factor".format(args.dir), "w") as f: f.write(str(args.frame_subsampling_factor)) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_to_process = args.num_epochs * num_archives_expanded num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) models_to_combine = common_train_lib.verify_iterations( num_iters, args.num_epochs, num_hidden_layers, num_archives_expanded, args.max_models_combine, args.add_layers_period, args.num_jobs_final) def learning_rate(iter, current_num_jobs, num_archives_processed): return common_train_lib.get_learning_rate(iter, current_num_jobs, num_iters, num_archives_processed, num_archives_to_process, args.initial_effective_lrate, args.final_effective_lrate) min_deriv_time = None max_deriv_time = None if args.deriv_truncate_margin is not None: min_deriv_time = -args.deriv_truncate_margin - model_left_context max_deriv_time = (args.chunk_width - 1 + args.deriv_truncate_margin + model_right_context) logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) for iter in range(num_iters): if (args.exit_stage is not None) and (iter == args.exit_stage): logger.info("Exiting early due to --exit-stage {0}".format(iter)) return current_num_jobs = int(0.5 + args.num_jobs_initial + (args.num_jobs_final - args.num_jobs_initial) * float(iter) / num_iters) if args.stage <= iter: model_file = "{dir}/{iter}.mdl".format(dir=args.dir, iter=iter) shrinkage_value = 1.0 if args.shrink_value != 1.0: shrinkage_value = (args.shrink_value if common_train_lib.do_shrinkage( iter, model_file, args.shrink_saturation_threshold) else 1 ) logger.info("On iteration {0}, learning rate is {1} and " "shrink value is {2}.".format( iter, learning_rate(iter, current_num_jobs, num_archives_processed), shrinkage_value)) chain_lib.train_one_iteration( dir=args.dir, iter=iter, srand=args.srand, egs_dir=egs_dir, num_jobs=current_num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), shrinkage_value=shrinkage_value, num_chunk_per_minibatch=args.num_chunk_per_minibatch, num_hidden_layers=num_hidden_layers, add_layers_period=args.add_layers_period, left_context=left_context, right_context=right_context, apply_deriv_weights=args.apply_deriv_weights, min_deriv_time=min_deriv_time, max_deriv_time=max_deriv_time, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, leaky_hmm_coefficient=args.leaky_hmm_coefficient, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, frame_subsampling_factor=args.frame_subsampling_factor, truncate_deriv_weights=args.truncate_deriv_weights, run_opts=run_opts, background_process_handler=background_process_handler) if args.cleanup: # do a clean up everythin but the last 2 models, under certain # conditions common_train_lib.remove_model( args.dir, iter-2, num_iters, models_to_combine, args.preserve_model_interval) if args.email is not None: reporting_iter_interval = num_iters * args.reporting_interval if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( nnet3_log_parse.generate_accuracy_report( args.dir, "log-probability")) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) common_lib.send_mail(message, subject, args.email) num_archives_processed = num_archives_processed + current_num_jobs if args.stage <= num_iters: logger.info("Doing final combination to produce final.mdl") chain_lib.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, num_chunk_per_minibatch=args.num_chunk_per_minibatch, egs_dir=egs_dir, left_context=left_context, right_context=right_context, leaky_hmm_coefficient=args.leaky_hmm_coefficient, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, run_opts=run_opts, background_process_handler=background_process_handler) if args.cleanup: logger.info("Cleaning up the experiment directory " "{0}".format(args.dir)) remove_egs = args.remove_egs if args.egs_dir is not None: # this egs_dir was not created by this experiment so we will not # delete it remove_egs = False common_train_lib.clean_nnet_dir( args.dir, num_iters, egs_dir, preserve_model_interval=args.preserve_model_interval, remove_egs=remove_egs) # do some reporting [report, times, data] = nnet3_log_parse.generate_accuracy_report( args.dir, "log-probability") if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f: f.write(report) common_lib.run_kaldi_command("steps/info/nnet3_dir_info.pl " "{0}".format(args.dir))
def train(args, run_opts): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Copy phones.txt from ali-dir to dir. Later, steps/nnet3/decode.sh will # use it to check compatibility between training and decoding phone-sets. shutil.copy('{0}/phones.txt'.format(args.ali_dir), args.dir) # Set some variables. num_jobs = common_lib.get_number_of_jobs(args.ali_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.execute_command("utils/split_data.sh {0} {1}".format( args.feat_dir, num_jobs)) shutil.copy('{0}/tree'.format(args.ali_dir), args.dir) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write('{}'.format(num_jobs)) config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) if args.input_model is None: config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) else: # If args.input_model is specified, the model left and right contexts # are computed using input_model. variables = common_train_lib.get_input_model_info(args.input_model) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context left_context_initial = (args.chunk_left_context_initial + model_left_context if args.chunk_left_context_initial >= 0 else -1) right_context_final = (args.chunk_right_context_final + model_right_context if args.chunk_right_context_final >= 0 else -1) # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. if (args.stage <= -5) and (args.input_model is None): logger.info("Initializing a basic network for estimating " "preconditioning matrix") common_lib.execute_command( """{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) default_egs_dir = '{0}/egs'.format(args.dir) if args.stage <= -4 and args.egs_dir is None: logger.info("Generating egs") if args.feat_dir is None: raise Exception("--feat-dir option is required if you don't supply --egs-dir") train_lib.acoustic_model.generate_egs( data=args.feat_dir, alidir=args.ali_dir, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, left_context_initial=left_context_initial, right_context_final=right_context_final, run_opts=run_opts, frames_per_eg_str=args.chunk_width, srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, online_ivector_dir=args.online_ivector_dir, samples_per_iter=args.samples_per_iter, stage=args.egs_stage) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, ivector_id, left_context, right_context, left_context_initial, right_context_final)) if args.chunk_width != frames_per_eg_str: raise Exception("mismatch between --egs.chunk-width and the frames_per_eg " "in the egs dir {0} vs {1}".format(args.chunk_width, frames_per_eg_str)) if args.num_jobs_final > num_archives: raise Exception('num_jobs_final cannot exceed the number of archives ' 'in the egs directory') # copy the properties of the egs to dir for # use during decoding common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if args.stage <= -3 and (args.input_model is None): logger.info('Computing the preconditioning matrix for input features') train_lib.common.compute_preconditioning_matrix( args.dir, egs_dir, num_archives, run_opts, max_lda_jobs=args.max_lda_jobs, rand_prune=args.rand_prune) if args.stage <= -2 and (args.input_model is None): logger.info("Computing initial vector for FixedScaleComponent before" " softmax, using priors^{prior_scale} and rescaling to" " average 1".format( prior_scale=args.presoftmax_prior_scale_power)) common_train_lib.compute_presoftmax_prior_scale( args.dir, args.ali_dir, num_jobs, run_opts, presoftmax_prior_scale_power=args.presoftmax_prior_scale_power) if args.stage <= -1: logger.info("Preparing the initial acoustic model.") train_lib.acoustic_model.prepare_initial_acoustic_model( args.dir, args.ali_dir, run_opts, input_model=args.input_model) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_to_process = int(args.num_epochs * num_archives) num_archives_processed = 0 num_iters = int((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) # If do_final_combination is True, compute the set of models_to_combine. # Otherwise, models_to_combine will be none. if args.do_final_combination: models_to_combine = common_train_lib.get_model_combine_iters( num_iters, args.num_epochs, num_archives, args.max_models_combine, args.num_jobs_final) else: models_to_combine = None min_deriv_time = None max_deriv_time_relative = None if args.deriv_truncate_margin is not None: min_deriv_time = -args.deriv_truncate_margin - model_left_context max_deriv_time_relative = \ args.deriv_truncate_margin + model_right_context logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) for iter in range(num_iters): if (args.exit_stage is not None) and (iter == args.exit_stage): logger.info("Exiting early due to --exit-stage {0}".format(iter)) return current_num_jobs = int(0.5 + args.num_jobs_initial + (args.num_jobs_final - args.num_jobs_initial) * float(iter) / num_iters) if args.stage <= iter: model_file = "{dir}/{iter}.mdl".format(dir=args.dir, iter=iter) lrate = common_train_lib.get_learning_rate(iter, current_num_jobs, num_iters, num_archives_processed, num_archives_to_process, args.initial_effective_lrate, args.final_effective_lrate) shrinkage_value = 1.0 - (args.proportional_shrink * lrate) if shrinkage_value <= 0.5: raise Exception("proportional-shrink={0} is too large, it gives " "shrink-value={1}".format(args.proportional_shrink, shrinkage_value)) if args.shrink_value < shrinkage_value: shrinkage_value = (args.shrink_value if common_train_lib.should_do_shrinkage( iter, model_file, args.shrink_saturation_threshold) else 1.0) percent = num_archives_processed * 100.0 / num_archives_to_process epoch = (num_archives_processed * args.num_epochs / num_archives_to_process) shrink_info_str = '' if shrinkage_value != 1.0: shrink_info_str = 'shrink: {0:0.5f}'.format(shrinkage_value) logger.info("Iter: {0}/{1} " "Epoch: {2:0.2f}/{3:0.1f} ({4:0.1f}% complete) " "lr: {5:0.6f} {6}".format(iter, num_iters - 1, epoch, args.num_epochs, percent, lrate, shrink_info_str)) train_lib.common.train_one_iteration( dir=args.dir, iter=iter, srand=args.srand, egs_dir=egs_dir, num_jobs=current_num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, learning_rate=lrate, dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), train_opts=' '.join(args.train_opts), shrinkage_value=shrinkage_value, minibatch_size_str=args.num_chunk_per_minibatch, min_deriv_time=min_deriv_time, max_deriv_time_relative=max_deriv_time_relative, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, run_opts=run_opts, backstitch_training_scale=args.backstitch_training_scale, backstitch_training_interval=args.backstitch_training_interval, compute_per_dim_accuracy=args.compute_per_dim_accuracy) if args.cleanup: # do a clean up everythin but the last 2 models, under certain # conditions common_train_lib.remove_model( args.dir, iter-2, num_iters, models_to_combine, args.preserve_model_interval) if args.email is not None: reporting_iter_interval = num_iters * args.reporting_interval if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( nnet3_log_parse.generate_acc_logprob_report(args.dir)) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) common_lib.send_mail(message, subject, args.email) num_archives_processed = num_archives_processed + current_num_jobs if args.stage <= num_iters: if args.do_final_combination: logger.info("Doing final combination to produce final.mdl") train_lib.common.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, egs_dir=egs_dir, run_opts=run_opts, minibatch_size_str=args.num_chunk_per_minibatch, chunk_width=args.chunk_width, max_objective_evaluations=args.max_objective_evaluations, compute_per_dim_accuracy=args.compute_per_dim_accuracy) if args.stage <= num_iters + 1: logger.info("Getting average posterior for purposes of " "adjusting the priors.") # If args.do_final_combination is true, we will use the combined model. # Otherwise, we will use the last_numbered model. real_iter = 'combined' if args.do_final_combination else num_iters avg_post_vec_file = train_lib.common.compute_average_posterior( dir=args.dir, iter=real_iter, egs_dir=egs_dir, num_archives=num_archives, prior_subset_size=args.prior_subset_size, run_opts=run_opts) logger.info("Re-adjusting priors based on computed posteriors") combined_or_last_numbered_model = "{dir}/{iter}.mdl".format(dir=args.dir, iter=real_iter) final_model = "{dir}/final.mdl".format(dir=args.dir) train_lib.common.adjust_am_priors(args.dir, combined_or_last_numbered_model, avg_post_vec_file, final_model, run_opts) if args.cleanup: logger.info("Cleaning up the experiment directory " "{0}".format(args.dir)) remove_egs = args.remove_egs if args.egs_dir is not None: # this egs_dir was not created by this experiment so we will not # delete it remove_egs = False common_train_lib.clean_nnet_dir( nnet_dir=args.dir, num_iters=num_iters, egs_dir=egs_dir, preserve_model_interval=args.preserve_model_interval, remove_egs=remove_egs) # do some reporting [report, times, data] = nnet3_log_parse.generate_acc_logprob_report(args.dir) if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f: f.write(report) common_lib.execute_command("steps/info/nnet3_dir_info.pl " "{0}".format(args.dir))
def model_init(args, run_opts): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Check files chain_lib.check_for_required_files(args.feat_dir, args.tree_dir, args.lat_dir) # Set some variables. num_jobs = common_lib.get_number_of_jobs(args.tree_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.execute_command("utils/split_data.sh {0} {1}" "".format(args.feat_dir, num_jobs)) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) if args.input_model is None: config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) else: # If args.input_model is specified, the model left and right contexts # are computed using input_model. variables = common_train_lib.get_input_model_info(args.input_model) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context left_context_initial = (args.chunk_left_context_initial + model_left_context if args.chunk_left_context_initial >= 0 else -1) right_context_final = (args.chunk_right_context_final + model_right_context if args.chunk_right_context_final >= 0 else -1) egs_left_context = left_context + args.frame_subsampling_factor / 2 egs_right_context = right_context + args.frame_subsampling_factor / 2 # note: the '+ args.frame_subsampling_factor / 2' is to allow for the # fact that we'll be shifting the data slightly during training to give # variety to the training data. egs_left_context_initial = (left_context_initial + args.frame_subsampling_factor / 2 if left_context_initial >= 0 else -1) egs_right_context_final = (right_context_final + args.frame_subsampling_factor / 2 if right_context_final >= 0 else -1) default_egs_dir = '{0}/egs'.format(args.dir) num_jobs = common_lib.get_number_of_jobs(args.tree_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.execute_command("utils/split_data.sh {0} {1}" "".format(args.feat_dir, num_jobs)) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) if args.input_model is None: config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) else: # If args.input_model is specified, the model left and right contexts # are computed using input_model. variables = common_train_lib.get_input_model_info(args.input_model) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context left_context_initial = (args.chunk_left_context_initial + model_left_context if args.chunk_left_context_initial >= 0 else -1) right_context_final = (args.chunk_right_context_final + model_right_context if args.chunk_right_context_final >= 0 else -1) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, ivector_id, egs_left_context, egs_right_context, egs_left_context_initial, egs_right_context_final)) assert(args.chunk_width == frames_per_eg_str) num_archives_expanded = num_archives * args.frame_subsampling_factor if (args.num_jobs_final > num_archives_expanded): raise Exception('num_jobs_final cannot exceed the ' 'expanded number of archives') # copy the properties of the egs to dir for # use during decoding logger.info("Copying the properties from {0} to {1}".format(egs_dir, args.dir)) common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if ((args.stage <= -2) and (os.path.exists(args.dir+"/configs/init.config")) and (args.input_model is None)): logger.info('Computing the preconditioning matrix for input features') chain_lib.compute_preconditioning_matrix( args.dir, egs_dir, num_archives, run_opts, max_lda_jobs=args.max_lda_jobs, rand_prune=args.rand_prune) if (args.stage <= -1): logger.info("Preparing the initial acoustic model.") chain_lib.prepare_initial_acoustic_model(args.dir, run_opts, input_model=args.input_model) with open("{0}/frame_subsampling_factor".format(args.dir), "w") as f: f.write(str(args.frame_subsampling_factor)) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_to_process = int(args.num_epochs * num_archives_expanded) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) # If do_final_combination is True, compute the set of models_to_combine. # Otherwise, models_to_combine will be none. if args.do_final_combination: models_to_combine = common_train_lib.get_model_combine_iters( num_iters, args.num_epochs, num_archives_expanded, args.max_models_combine, args.num_jobs_final) else: models_to_combine = None min_deriv_time = None max_deriv_time_relative = None if args.deriv_truncate_margin is not None: min_deriv_time = -args.deriv_truncate_margin - model_left_context max_deriv_time_relative = \ args.deriv_truncate_margin + model_right_context logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters))
def one_iter(args, run_opts): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Check files chain_lib.check_for_required_files(args.feat_dir, args.tree_dir, args.lat_dir) # Set some variables. num_jobs = common_lib.get_number_of_jobs(args.tree_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.execute_command("utils/split_data.sh {0} {1}" "".format(args.feat_dir, num_jobs)) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) if args.input_model is None: config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) else: # If args.input_model is specified, the model left and right contexts # are computed using input_model. variables = common_train_lib.get_input_model_info(args.input_model) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context left_context_initial = (args.chunk_left_context_initial + model_left_context if args.chunk_left_context_initial >= 0 else -1) right_context_final = (args.chunk_right_context_final + model_right_context if args.chunk_right_context_final >= 0 else -1) egs_left_context = left_context + args.frame_subsampling_factor / 2 egs_right_context = right_context + args.frame_subsampling_factor / 2 # note: the '+ args.frame_subsampling_factor / 2' is to allow for the # fact that we'll be shifting the data slightly during training to give # variety to the training data. egs_left_context_initial = (left_context_initial + args.frame_subsampling_factor / 2 if left_context_initial >= 0 else -1) egs_right_context_final = (right_context_final + args.frame_subsampling_factor / 2 if right_context_final >= 0 else -1) default_egs_dir = '{0}/egs'.format(args.dir) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = (common_train_lib.verify_egs_dir( egs_dir, feat_dim, ivector_dim, ivector_id, egs_left_context, egs_right_context, egs_left_context_initial, egs_right_context_final)) assert (args.chunk_width == frames_per_eg_str) num_archives_expanded = num_archives * args.frame_subsampling_factor if (args.num_jobs_final > num_archives_expanded): raise Exception('num_jobs_final cannot exceed the ' 'expanded number of archives') # copy the properties of the egs to dir for # use during decoding logger.info("Copying the properties from {0} to {1}".format( egs_dir, args.dir)) common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if ((args.stage <= -2) and (os.path.exists(args.dir + "/configs/init.config")) and (args.input_model is None)): logger.info('Computing the preconditioning matrix for input features') chain_lib.compute_preconditioning_matrix( args.dir, egs_dir, num_archives, run_opts, max_lda_jobs=args.max_lda_jobs, rand_prune=args.rand_prune) if (args.stage <= -1): logger.info("Preparing the initial acoustic model.") chain_lib.prepare_initial_acoustic_model(args.dir, run_opts, input_model=args.input_model) with open("{0}/frame_subsampling_factor".format(args.dir), "w") as f: f.write(str(args.frame_subsampling_factor)) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_to_process = int(args.num_epochs * num_archives_expanded) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) # If do_final_combination is True, compute the set of models_to_combine. # Otherwise, models_to_combine will be none. if args.do_final_combination: models_to_combine = common_train_lib.get_model_combine_iters( num_iters, args.num_epochs, num_archives_expanded, args.max_models_combine, args.num_jobs_final) else: models_to_combine = None min_deriv_time = None max_deriv_time_relative = None if args.deriv_truncate_margin is not None: min_deriv_time = -args.deriv_truncate_margin - model_left_context max_deriv_time_relative = \ args.deriv_truncate_margin + model_right_context logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) if (args.exit_stage is not None) and (args.iter == args.exit_stage): logger.info("Exiting early due to --exit-stage {0}".format(args.iter)) return current_num_jobs = int(0.5 + args.num_jobs_initial + (args.num_jobs_final - args.num_jobs_initial) * float(args.iter) / num_iters) model_file = "{dir}/{iter}.mdl".format(dir=args.dir, iter=args.iter) lrate = common_train_lib.get_learning_rate(args.iter, current_num_jobs, num_iters, num_archives_processed, num_archives_to_process, args.initial_effective_lrate, args.final_effective_lrate) shrinkage_value = 1.0 - (args.proportional_shrink * lrate) if shrinkage_value <= 0.5: raise Exception("proportional-shrink={0} is too large, it gives " "shrink-value={1}".format(args.proportional_shrink, shrinkage_value)) if args.shrink_value < shrinkage_value: shrinkage_value = ( args.shrink_value if common_train_lib.should_do_shrinkage( args.iter, model_file, args.shrink_saturation_threshold) else shrinkage_value) percent = num_archives_processed * 100.0 / num_archives_to_process epoch = (num_archives_processed * args.num_epochs / num_archives_to_process) shrink_info_str = '' if shrinkage_value != 1.0: shrink_info_str = 'shrink: {0:0.5f}'.format(shrinkage_value) logger.info("Iter: {0}/{1} " "Epoch: {2:0.2f}/{3:0.1f} ({4:0.1f}% complete) " "lr: {5:0.6f} {6}".format(args.iter, num_iters - 1, epoch, args.num_epochs, percent, lrate, shrink_info_str)) chain_lib.train_one_iteration( dir=args.dir, iter=args.iter, srand=args.srand, egs_dir=egs_dir, num_jobs=current_num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, learning_rate=lrate, dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, args.iter), shrinkage_value=shrinkage_value, num_chunk_per_minibatch_str=args.num_chunk_per_minibatch, apply_deriv_weights=args.apply_deriv_weights, min_deriv_time=min_deriv_time, max_deriv_time_relative=max_deriv_time_relative, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, leaky_hmm_coefficient=args.leaky_hmm_coefficient, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, frame_subsampling_factor=args.frame_subsampling_factor, run_opts=run_opts, backstitch_training_scale=args.backstitch_training_scale, backstitch_training_interval=args.backstitch_training_interval) num_archives_processed = num_archives_processed + current_num_jobs # do some reporting [report, times, data ] = nnet3_log_parse.generate_acc_logprob_report(args.dir, "log-probability") if args.email is not None: common_lib.send_mail( report, "Update : Expt {0} : " "complete".format(args.dir), args.email) with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f: f.write(report) common_lib.execute_command("steps/info/nnet3_dir_info.pl " "{0}".format(args.dir))
def train(args, run_opts): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Set some variables. # num_leaves = common_lib.get_number_of_leaves_from_tree(args.ali_dir) num_jobs = common_lib.get_number_of_jobs(args.ali_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment # 将特征数据 按照job数量进行划分, 会使用相同数量的job 进行对齐. # feat_dir -- data/train_sp_hires 划分为jos 个子集. common_lib.execute_command("utils/split_data.sh {0} {1}".format( args.feat_dir, num_jobs)) # copy tri5a_sp_ali/tree(最后训练好的模型tree) ---> exp/nnet3/tdnn_sp shutil.copy('{0}/tree'.format(args.ali_dir), args.dir) # 将ali_dir(exp/tri5a_sp_ali/num_jobs) 读取写入 目标目录 exp/nnet3/tdnn_sp/num_jos with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) # 目标目录中 存入 nnet3结构/ 以及nnet3的 left right context配置信息 config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) # liujunnan@innovem:configs$ cat vars # model_left_context=16 # model_right_context=12 # liujunnan@innovem:configs$ # Set some variables. model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] left_context = model_left_context right_context = model_right_context # 初始化 原始nnet结构, 在训练 LDA预处理矩阵之前 # 第一个配置 只做了一些初始化拼接操作 # 这样做,是因为这样很方变能够获得LDA变换矩阵的统计信息.?????? if (args.stage <= -5) and os.path.exists(args.dir+"/configs/init.config"): logger.info("Initializing a basic network for estimating preconditioning matrix") # nnet3-init 利用 exp/nnet3/tdnn_sp/configs/init.config ===> exp/nnet3/tdnn_sp/init.raw. common_lib.execute_command( """{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir))
def train(args, run_opts, background_process_handler): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Check files chain_lib.check_for_required_files(args.feat_dir, args.tree_dir, args.lat_dir) # Set some variables. num_jobs = common_lib.get_number_of_jobs(args.tree_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.split_data(args.feat_dir, num_jobs) shutil.copy('{0}/tree'.format(args.tree_dir), args.dir) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] # this is really the number of times we add layers to the network for # discriminative pretraining num_hidden_layers = variables['num_hidden_layers'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. if (args.stage <= -6): logger.info("Creating phone language-model") chain_lib.create_phone_lm(args.dir, args.tree_dir, run_opts, lm_opts=args.lm_opts) if (args.stage <= -5): logger.info("Creating denominator FST") chain_lib.create_denominator_fst(args.dir, args.tree_dir, run_opts) if (args.stage <= -4): logger.info("Initializing a basic network for estimating " "preconditioning matrix") common_lib.run_kaldi_command( """{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) egs_left_context = left_context + args.frame_subsampling_factor/2 egs_right_context = right_context + args.frame_subsampling_factor/2 default_egs_dir = '{0}/egs'.format(args.dir) if (args.stage <= -3) and args.egs_dir is None: logger.info("Generating egs") # this is where get_egs.sh is called. chain_lib.generate_chain_egs( dir=args.dir, data=args.feat_dir, lat_dir=args.lat_dir, egs_dir=default_egs_dir, left_context=egs_left_context, right_context=egs_right_context, run_opts=run_opts, left_tolerance=args.left_tolerance, right_tolerance=args.right_tolerance, frame_subsampling_factor=args.frame_subsampling_factor, alignment_subsampling_factor=args.alignment_subsampling_factor, frames_per_eg=args.chunk_width, srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, online_ivector_dir=args.online_ivector_dir, frames_per_iter=args.frames_per_iter, transform_dir=args.transform_dir, stage=args.egs_stage) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, egs_left_context, egs_right_context)) assert(args.chunk_width == frames_per_eg) num_archives_expanded = num_archives * args.frame_subsampling_factor if (args.num_jobs_final > num_archives_expanded): raise Exception('num_jobs_final cannot exceed the ' 'expanded number of archives') # copy the properties of the egs to dir for # use during decoding common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if (args.stage <= -2): logger.info('Computing the preconditioning matrix for input features') chain_lib.compute_preconditioning_matrix( args.dir, egs_dir, num_archives, run_opts, max_lda_jobs=args.max_lda_jobs, rand_prune=args.rand_prune) if (args.stage <= -1): logger.info("Preparing the initial acoustic model.") chain_lib.prepare_initial_acoustic_model(args.dir, run_opts) with open("{0}/frame_subsampling_factor".format(args.dir), "w") as f: f.write(str(args.frame_subsampling_factor)) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_to_process = args.num_epochs * num_archives_expanded num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) models_to_combine = common_train_lib.verify_iterations( num_iters, args.num_epochs, num_hidden_layers, num_archives_expanded, args.max_models_combine, args.add_layers_period, args.num_jobs_final) def learning_rate(iter, current_num_jobs, num_archives_processed): return common_train_lib.get_learning_rate(iter, current_num_jobs, num_iters, num_archives_processed, num_archives_to_process, args.initial_effective_lrate, args.final_effective_lrate) min_deriv_time = None max_deriv_time = None if args.deriv_truncate_margin is not None: min_deriv_time = -args.deriv_truncate_margin - model_left_context max_deriv_time = (args.chunk_width - 1 + args.deriv_truncate_margin + model_right_context) logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) for iter in range(num_iters): if (args.exit_stage is not None) and (iter == args.exit_stage): logger.info("Exiting early due to --exit-stage {0}".format(iter)) return current_num_jobs = int(0.5 + args.num_jobs_initial + (args.num_jobs_final - args.num_jobs_initial) * float(iter) / num_iters) if args.stage <= iter: model_file = "{dir}/{iter}.mdl".format(dir=args.dir, iter=iter) shrinkage_value = 1.0 if args.shrink_value != 1.0: shrinkage_value = (args.shrink_value if common_train_lib.do_shrinkage( iter, model_file, args.shrink_saturation_threshold) else 1 ) chain_lib.train_one_iteration( dir=args.dir, iter=iter, srand=args.srand, egs_dir=egs_dir, num_jobs=current_num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), dropout_edit_string=common_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), shrinkage_value=shrinkage_value, num_chunk_per_minibatch=args.num_chunk_per_minibatch, num_hidden_layers=num_hidden_layers, add_layers_period=args.add_layers_period, left_context=left_context, right_context=right_context, apply_deriv_weights=args.apply_deriv_weights, min_deriv_time=min_deriv_time, max_deriv_time=max_deriv_time, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, leaky_hmm_coefficient=args.leaky_hmm_coefficient, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, frame_subsampling_factor=args.frame_subsampling_factor, truncate_deriv_weights=args.truncate_deriv_weights, run_opts=run_opts, background_process_handler=background_process_handler) if args.cleanup: # do a clean up everythin but the last 2 models, under certain # conditions common_train_lib.remove_model( args.dir, iter-2, num_iters, models_to_combine, args.preserve_model_interval) if args.email is not None: reporting_iter_interval = num_iters * args.reporting_interval if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( nnet3_log_parse.generate_accuracy_report( args.dir, "log-probability")) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) common_lib.send_mail(message, subject, args.email) num_archives_processed = num_archives_processed + current_num_jobs if args.stage <= num_iters: logger.info("Doing final combination to produce final.mdl") chain_lib.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, num_chunk_per_minibatch=args.num_chunk_per_minibatch, egs_dir=egs_dir, left_context=left_context, right_context=right_context, leaky_hmm_coefficient=args.leaky_hmm_coefficient, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, run_opts=run_opts, background_process_handler=background_process_handler) if args.cleanup: logger.info("Cleaning up the experiment directory " "{0}".format(args.dir)) remove_egs = args.remove_egs if args.egs_dir is not None: # this egs_dir was not created by this experiment so we will not # delete it remove_egs = False common_train_lib.clean_nnet_dir( args.dir, num_iters, egs_dir, preserve_model_interval=args.preserve_model_interval, remove_egs=remove_egs) # do some reporting [report, times, data] = nnet3_log_parse.generate_accuracy_report( args.dir, "log-probability") if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f: f.write(report) common_lib.run_kaldi_command("steps/info/nnet3_dir_info.pl " "{0}".format(args.dir))
def egs_prepare(args, run_opts): """ The main function for egs_generate. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Check files chain_lib.check_for_required_files(args.feat_dir, args.tree_dir, args.lat_dir) # Set some variables. num_jobs = common_lib.get_number_of_jobs(args.tree_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.execute_command("utils/split_data.sh {0} {1}" "".format(args.feat_dir, num_jobs)) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) if args.input_model is None: config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) else: # If args.input_model is specified, the model left and right contexts # are computed using input_model. variables = common_train_lib.get_input_model_info(args.input_model) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context left_context_initial = (args.chunk_left_context_initial + model_left_context if args.chunk_left_context_initial >= 0 else -1) right_context_final = (args.chunk_right_context_final + model_right_context if args.chunk_right_context_final >= 0 else -1) # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. if (args.stage <= -6): logger.info("Creating phone language-model") chain_lib.create_phone_lm(args.dir, args.tree_dir, run_opts, lm_opts=args.lm_opts) if (args.stage <= -5): logger.info("Creating denominator FST") shutil.copy('{0}/tree'.format(args.tree_dir), args.dir) chain_lib.create_denominator_fst(args.dir, args.tree_dir, run_opts) if ((args.stage <= -4) and os.path.exists("{0}/configs/init.config".format(args.dir)) and (args.input_model is None)): logger.info("Initializing a basic network for estimating " "preconditioning matrix") common_lib.execute_command( """{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) egs_left_context = left_context + args.frame_subsampling_factor / 2 egs_right_context = right_context + args.frame_subsampling_factor / 2 # note: the '+ args.frame_subsampling_factor / 2' is to allow for the # fact that we'll be shifting the data slightly during training to give # variety to the training data. egs_left_context_initial = (left_context_initial + args.frame_subsampling_factor / 2 if left_context_initial >= 0 else -1) egs_right_context_final = (right_context_final + args.frame_subsampling_factor / 2 if right_context_final >= 0 else -1)
def train(args, run_opts, background_process_handler): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Set some variables. # num_leaves = common_lib.get_number_of_leaves_from_tree(args.ali_dir) num_jobs = common_lib.get_number_of_jobs(args.ali_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.split_data(args.feat_dir, num_jobs) shutil.copy('{0}/tree'.format(args.ali_dir), args.dir) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] # this is really the number of times we add layers to the network for # discriminative pretraining num_hidden_layers = variables['num_hidden_layers'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = model_left_context right_context = model_right_context # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. if (args.stage <= -5): logger.info("Initializing a basic network for estimating " "preconditioning matrix") common_lib.run_job( """{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) default_egs_dir = '{0}/egs'.format(args.dir) if (args.stage <= -4) and args.egs_dir is None: logger.info("Generating egs") train_lib.acoustic_model.generate_egs( data=args.feat_dir, alidir=args.ali_dir, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, frames_per_eg_str=str(args.frames_per_eg), srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, online_ivector_dir=args.online_ivector_dir, samples_per_iter=args.samples_per_iter, transform_dir=args.transform_dir, stage=args.egs_stage) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, ivector_id, left_context, right_context)) assert(str(args.frames_per_eg) == frames_per_eg_str) if (args.num_jobs_final > num_archives): raise Exception('num_jobs_final cannot exceed the number of archives ' 'in the egs directory') # copy the properties of the egs to dir for # use during decoding common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if (args.stage <= -3): logger.info('Computing the preconditioning matrix for input features') train_lib.common.compute_preconditioning_matrix( args.dir, egs_dir, num_archives, run_opts, max_lda_jobs=args.max_lda_jobs, rand_prune=args.rand_prune) if (args.stage <= -2): logger.info("Computing initial vector for FixedScaleComponent before" " softmax, using priors^{prior_scale} and rescaling to" " average 1".format( prior_scale=args.presoftmax_prior_scale_power)) common_train_lib.compute_presoftmax_prior_scale( args.dir, args.ali_dir, num_jobs, run_opts, presoftmax_prior_scale_power=args.presoftmax_prior_scale_power) if (args.stage <= -1): logger.info("Preparing the initial acoustic model.") train_lib.acoustic_model.prepare_initial_acoustic_model( args.dir, args.ali_dir, run_opts) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_expanded = num_archives * args.frames_per_eg num_archives_to_process = int(args.num_epochs * num_archives_expanded) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) models_to_combine = common_train_lib.verify_iterations( num_iters, args.num_epochs, num_hidden_layers, num_archives_expanded, args.max_models_combine, args.add_layers_period, args.num_jobs_final) def learning_rate(iter, current_num_jobs, num_archives_processed): return common_train_lib.get_learning_rate(iter, current_num_jobs, num_iters, num_archives_processed, num_archives_to_process, args.initial_effective_lrate, args.final_effective_lrate) logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) for iter in range(num_iters): if (args.exit_stage is not None) and (iter == args.exit_stage): logger.info("Exiting early due to --exit-stage {0}".format(iter)) return current_num_jobs = int(0.5 + args.num_jobs_initial + (args.num_jobs_final - args.num_jobs_initial) * float(iter) / num_iters) if args.stage <= iter: train_lib.common.train_one_iteration( dir=args.dir, iter=iter, srand=args.srand, egs_dir=egs_dir, num_jobs=current_num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), minibatch_size_str=args.minibatch_size, frames_per_eg=args.frames_per_eg, num_hidden_layers=num_hidden_layers, add_layers_period=args.add_layers_period, left_context=left_context, right_context=right_context, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, run_opts=run_opts, background_process_handler=background_process_handler) if args.cleanup: # do a clean up everythin but the last 2 models, under certain # conditions common_train_lib.remove_model( args.dir, iter-2, num_iters, models_to_combine, args.preserve_model_interval) if args.email is not None: reporting_iter_interval = num_iters * args.reporting_interval if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( nnet3_log_parse.generate_acc_logprob_report(args.dir)) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) common_lib.send_mail(message, subject, args.email) num_archives_processed = num_archives_processed + current_num_jobs if args.stage <= num_iters: logger.info("Doing final combination to produce final.mdl") train_lib.common.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, egs_dir=egs_dir, left_context=left_context, right_context=right_context, minibatch_size_str=args.minibatch_size, run_opts=run_opts, background_process_handler=background_process_handler, sum_to_one_penalty=args.combine_sum_to_one_penalty) if args.stage <= num_iters + 1: logger.info("Getting average posterior for purposes of " "adjusting the priors.") avg_post_vec_file = train_lib.common.compute_average_posterior( dir=args.dir, iter='combined', egs_dir=egs_dir, num_archives=num_archives, left_context=left_context, right_context=right_context, prior_subset_size=args.prior_subset_size, run_opts=run_opts) logger.info("Re-adjusting priors based on computed posteriors") combined_model = "{dir}/combined.mdl".format(dir=args.dir) final_model = "{dir}/final.mdl".format(dir=args.dir) train_lib.common.adjust_am_priors(args.dir, combined_model, avg_post_vec_file, final_model, run_opts) if args.cleanup: logger.info("Cleaning up the experiment directory " "{0}".format(args.dir)) remove_egs = args.remove_egs if args.egs_dir is not None: # this egs_dir was not created by this experiment so we will not # delete it remove_egs = False common_train_lib.clean_nnet_dir( nnet_dir=args.dir, num_iters=num_iters, egs_dir=egs_dir, preserve_model_interval=args.preserve_model_interval, remove_egs=remove_egs) # do some reporting [report, times, data] = nnet3_log_parse.generate_acc_logprob_report(args.dir) if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) with open("{dir}/accuracy.report".format(dir=args.dir), "w") as f: f.write(report) common_lib.run_job("steps/info/nnet3_dir_info.pl " "{0}".format(args.dir))
def egs_generate(args, run_opts): """ The main function for egs_generate. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Check files chain_lib.check_for_required_files(args.feat_dir, args.tree_dir, args.lat_dir) # Set some variables. num_jobs = common_lib.get_number_of_jobs(args.tree_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment common_lib.execute_command("utils/split_data.sh {0} {1}" "".format(args.feat_dir, num_jobs)) with open('{0}/num_jobs'.format(args.dir), 'w') as f: f.write(str(num_jobs)) if args.input_model is None: config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) variables = common_train_lib.parse_generic_config_vars_file(var_file) else: # If args.input_model is specified, the model left and right contexts # are computed using input_model. variables = common_train_lib.get_input_model_info(args.input_model) # Set some variables. try: model_left_context = variables['model_left_context'] model_right_context = variables['model_right_context'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context left_context_initial = (args.chunk_left_context_initial + model_left_context if args.chunk_left_context_initial >= 0 else -1) right_context_final = (args.chunk_right_context_final + model_right_context if args.chunk_right_context_final >= 0 else -1) egs_left_context = left_context + args.frame_subsampling_factor / 2 egs_right_context = right_context + args.frame_subsampling_factor / 2 # note: the '+ args.frame_subsampling_factor / 2' is to allow for the # fact that we'll be shifting the data slightly during training to give # variety to the training data. egs_left_context_initial = (left_context_initial + args.frame_subsampling_factor / 2 if left_context_initial >= 0 else -1) egs_right_context_final = (right_context_final + args.frame_subsampling_factor / 2 if right_context_final >= 0 else -1) default_egs_dir = '{0}/egs_{1}'.format(args.dir, args.suffix) if ((args.stage <= -3) and args.egs_dir is None): logger.info("Generating egs") if (not os.path.exists("{0}/den.fst".format(args.dir)) or not os.path.exists("{0}/normalization.fst".format(args.dir)) or not os.path.exists("{0}/tree".format(args.dir))): raise Exception("Chain egs generation expects {0}/den.fst, " "{0}/normalization.fst and {0}/tree " "to exist.".format(args.dir)) # this is where get_egs.sh is called. chain_lib.generate_chain_egs( dir=args.dir, data=args.feat_dir, lat_dir=args.lat_dir, egs_dir=default_egs_dir, left_context=egs_left_context, right_context=egs_right_context, left_context_initial=egs_left_context_initial, right_context_final=egs_right_context_final, run_opts=run_opts, left_tolerance=args.left_tolerance, right_tolerance=args.right_tolerance, frame_subsampling_factor=args.frame_subsampling_factor, alignment_subsampling_factor=args.alignment_subsampling_factor, frames_per_eg_str=args.chunk_width, srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, online_ivector_dir=args.online_ivector_dir, frames_per_iter=args.frames_per_iter, transform_dir=args.transform_dir, stage=args.egs_stage) if args.egs_dir is None: egs_dir = default_egs_dir else: egs_dir = args.egs_dir [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, ivector_id, egs_left_context, egs_right_context, egs_left_context_initial, egs_right_context_final)) assert(args.chunk_width == frames_per_eg_str) num_archives_expanded = num_archives * args.frame_subsampling_factor