def saveModel(dnn,cfg): log("> Start saveModel") # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, path=cfg.param_output_file, input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the best PDNN model param so far is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the best PDNN model config so far is ' + cfg.cfg_output_file) # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.write_model_to_kaldi(cfg.kaldi_output_file) log('> ... the best Kaldi model so far is ' + cfg.kaldi_output_file) log("< End SaveModel")
for batch_index in xrange(cfg.train_sets.cur_frame_num / cfg.batch_size): # loop over mini-batches [reconstruction_cost, free_energy_cost] = pretraining_fns[i](index=batch_index, lr=pretrain_lr, momentum=momentum) r_c.append(reconstruction_cost) fe_c.append(free_energy_cost) cfg.train_sets.initialize_read() log('> pre-training layer %i, epoch %d, r_cost %f, fe_cost %f' % (i, epoch, numpy.mean(r_c), numpy.mean(fe_c))) # output nnet parameters and training state, for training resume _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') save_two_integers((i, epoch+1), wdir + '/training_state.tmp') start_epoch_index = 0 save_two_integers((i+1, 0), wdir + '/training_state.tmp') # save the pretrained nnet to file # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file) # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.write_model_to_kaldi(cfg.kaldi_output_file) log('> ... the final Kaldi model is ' + cfg.kaldi_output_file) # finally remove the training-resuming files os.remove(wdir + '/nnet.tmp') os.remove(wdir + '/training_state.tmp')
log('> ... learning the adaptation network') cfg = cfg_adapt while (cfg.lrate.get_rate() != 0): # one epoch of sgd training # train_error = train_sgd_verbose(train_fn, cfg_si.train_sets, cfg_si.train_xy, # cfg.batch_size, cfg.lrate.get_rate(), cfg.momentum) # log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100*numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch_verbose(valid_fn, cfg_si.valid_sets, cfg_si.valid_xy, cfg.batch_size) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100*numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = 100 * numpy.mean(valid_error)) cfg.lrate.rate = 0 # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.dnn_adapt.layers, filename = cfg.param_output_file + '.adapt', input_factor = cfg_adapt.input_dropout_factor, factor = cfg_adapt.dropout_factor) _nnet2file(dnn.cnn_si.layers, filename = cfg.param_output_file + '.si', input_factor = cfg_si.input_dropout_factor, factor = cfg_si.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file + ' (.si, .adapt)') if cfg.cfg_output_file != '': _cfg2file(cfg_adapt, filename=cfg.cfg_output_file + '.adapt') _cfg2file(cfg_si, filename=cfg.cfg_output_file + '.si') log('> ... the final PDNN model config is ' + cfg.cfg_output_file + ' (.si, .adapt)') # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.cnn_si.fc_dnn.write_model_to_kaldi(cfg.kaldi_output_file + '.si') dnn.dnn_adapt.write_model_to_kaldi(cfg.kaldi_output_file + '.adapt', with_softmax = False) log('> ... the final Kaldi model is ' + cfg.kaldi_output_file + ' (.si, .adapt)')
cfg.lrate.get_next_rate(current_error = 100 * numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration for i in range(len(cfg.hidden_layers_sizes)): if i==0: n_params = (cfg.n_ins + 1) * cfg.hidden_layers_sizes[i] else: n_params += (cfg.hidden_layers_sizes[i-1] + 1) * cfg.hidden_layers_sizes[i] n_params += cfg.n_outs * (cfg.hidden_layers_sizes[i-1] + 1) ratio = float(n_params) / float(n_data_train) log('-->> Ratio Parameters / Data : ' + str(ratio)) if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file, input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file) # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.write_model_to_kaldi(cfg.kaldi_output_file) log('> ... the final Kaldi model is ' + cfg.kaldi_output_file) # remove the tmp files (which have been generated from resuming training) os.remove(wdir + '/nnet.tmp') os.remove(wdir + '/training_state.tmp')
for batch_index in xrange(cfg.train_sets.cur_frame_num / cfg.batch_size): # loop over mini-batches [reconstruction_cost, free_energy_cost] = pretraining_fns[i](index=batch_index, lr=pretrain_lr, momentum=momentum) r_c.append(reconstruction_cost) fe_c.append(free_energy_cost) cfg.train_sets.initialize_read() log('> pre-training layer %i, epoch %d, r_cost %f, fe_cost %f' % (i, epoch, numpy.mean(r_c), numpy.mean(fe_c))) # output nnet parameters and training state, for training resume _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') save_two_integers((i, epoch+1), wdir + '/training_state.tmp') start_epoch_index = 0 save_two_integers((i+1, 0), wdir + '/training_state.tmp') # save the pretrained nnet to file # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(srbm.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file) # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.write_model_to_kaldi(cfg.kaldi_output_file) log('> ... the final Kaldi model is ' + cfg.kaldi_output_file) # finally remove the training-resuming files os.remove(wdir + '/nnet.tmp') os.remove(wdir + '/training_state.tmp')
else: log('> task %d, epoch %d, training error %f ' % (n, cfg.lrate.epoch, numpy.mean(train_error_array[n])) + '(%)') train_error_array[n] = [] # perform validation, output valid error rate, and adjust learning rate based on the learning rate valid_error = validate_by_minibatch(valid_fn_array[n], cfg) log('> task %d, epoch %d, lrate %f, validation error %f ' % (n, cfg.lrate.epoch, cfg.lrate.get_rate(), numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume _nnet2file(dnn_array[n].layers, filename=wdir + '/nnet.tmp.task' + str(n)) _lrate2file(cfg.lrate, wdir + '/training_state.tmp.task' + str(n)) # if the lrate of a task decays to 0, training on this task terminates; it will be excluded from future training if cfg.lrate.get_rate() == 0: active_tasks_new.remove(n) # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn_array[n].layers, filename=cfg.param_output_file + '.task' + str(n), input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file + '.task' + str(n)) if cfg.cfg_output_file != '': _cfg2file(dnn_array[n].cfg, filename=cfg.cfg_output_file + '.task' + str(n)) log('> ... the final PDNN model config is ' + cfg.cfg_output_file + '.task' + str(n)) # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn_array[n].write_model_to_kaldi(cfg.kaldi_output_file + '.task' + str(n)) log('> ... the final Kaldi model is ' + cfg.kaldi_output_file + '.task' + str(n)) # remove the tmp files (which have been generated from resuming training) os.remove(wdir + '/nnet.tmp.task' + str(n)); os.remove(wdir + '/training_state.tmp.task' + str(n)) active_tasks = active_tasks_new
(cfg.lrate.epoch, cfg.lrate.get_rate(), 100 * numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error=100 * numpy.mean(valid_error)) cfg.lrate.rate = 0 # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.dnn_adapt.layers, filename=cfg.param_output_file + '.adapt', input_factor=cfg_adapt.input_dropout_factor, factor=cfg_adapt.dropout_factor) _nnet2file(dnn.cnn_si.layers, filename=cfg.param_output_file + '.si', input_factor=cfg_si.input_dropout_factor, factor=cfg_si.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file + ' (.si, .adapt)') if cfg.cfg_output_file != '': _cfg2file(cfg_adapt, filename=cfg.cfg_output_file + '.adapt') _cfg2file(cfg_si, filename=cfg.cfg_output_file + '.si') log('> ... the final PDNN model config is ' + cfg.cfg_output_file + ' (.si, .adapt)') # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.cnn_si.fc_dnn.write_model_to_kaldi(cfg.kaldi_output_file + '.si') dnn.dnn_adapt.write_model_to_kaldi(cfg.kaldi_output_file + '.adapt', with_softmax=False) log('> ... the final Kaldi model is ' + cfg.kaldi_output_file + ' (.si, .adapt)')
# output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.dnn.layers, filename = cfg.param_output_file, input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) _nnet2file(dnn.dnn_tower1.layers, filename = cfg.param_output_file + '.tower1', input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) _nnet2file(dnn.dnn_tower2.layers, filename = cfg.param_output_file + '.tower2', input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file + '(, .tower1, .tower2)') if cfg.cfg_output_file != '': _cfg2file(cfg, filename=cfg.cfg_output_file) _cfg2file(cfg_tower1, filename=cfg.cfg_output_file + '.tower1') _cfg2file(cfg_tower2, filename=cfg.cfg_output_file + '.tower2') log('> ... the final PDNN model config is ' + cfg.cfg_output_file + '(, .tower1, .tower2)') # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.dnn.write_model_to_kaldi(cfg.kaldi_output_file) dnn.dnn_tower1.write_model_to_kaldi(cfg.kaldi_output_file + '.tower1', with_softmax = False) dnn.dnn_tower2.write_model_to_kaldi(cfg.kaldi_output_file + '.tower2', with_softmax = False) log('> ... the final Kaldi model is ' + cfg.kaldi_output_file + '(, .tower1, .tower2)') # remove the tmp files (which have been generated from resuming training) os.remove(wdir + '/nnet.tmp') os.remove(wdir + '/training_state.tmp')
+ "(%)" ) cfg.lrate.get_next_rate(current_error=100 * numpy.mean(valid_error)) cfg.lrate.rate = 0 # save the model and network configuration if cfg.param_output_file != "": _nnet2file( dnn.dnn_adapt.layers, filename=cfg.param_output_file + ".adapt", input_factor=cfg_adapt.input_dropout_factor, factor=cfg_adapt.dropout_factor, ) _nnet2file( dnn.cnn_si.layers, filename=cfg.param_output_file + ".si", input_factor=cfg_si.input_dropout_factor, factor=cfg_si.dropout_factor, ) log("> ... the final PDNN model parameter is " + cfg.param_output_file + " (.si, .adapt)") if cfg.cfg_output_file != "": _cfg2file(cfg_adapt, filename=cfg.cfg_output_file + ".adapt") _cfg2file(cfg_si, filename=cfg.cfg_output_file + ".si") log("> ... the final PDNN model config is " + cfg.cfg_output_file + " (.si, .adapt)") # output the model into Kaldi-compatible format if cfg.kaldi_output_file != "": dnn.cnn_si.fc_dnn.write_model_to_kaldi(cfg.kaldi_output_file + ".si") dnn.dnn_adapt.write_model_to_kaldi(cfg.kaldi_output_file + ".adapt", with_softmax=False) log("> ... the final Kaldi model is " + cfg.kaldi_output_file + " (.si, .adapt)")
def dnn_run(arguments): required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg); exit(1) train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] cfg = NetworkConfig() cfg.parse_config_dnn(arguments, nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0; ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp') log('> ... found nnet.tmp and training_state.tmp, now resume training from epoch ' + str(cfg.lrate.epoch)) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... building the model') # setup model if cfg.do_dropout: dnn = DNN_Dropout(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) else: dnn = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) # initialize model parameters # if not resuming training, initialized from the specified pre-training file # if resuming training, initialized from the tmp model file if (ptr_layer_number > 0) and (resume_training is False): _file2nnet(dnn.layers, set_layer_num = ptr_layer_number, filename = ptr_file) if resume_training: _file2nnet(dnn.layers, filename = wdir + '/nnet.tmp') # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions((cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) log('> ... finetuning the model') while (cfg.lrate.get_rate() != 0): # one epoch of sgd training train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100*numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100*numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = 100*numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file, input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file)
def dnn_run(arguments): required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] cfg = NetworkConfig() cfg.parse_config_dnn(arguments, nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0 ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp') log('> ... found nnet.tmp and training_state.tmp, now resume training from epoch ' + str(cfg.lrate.epoch)) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30)) log('> ... building the model') # setup model if cfg.do_dropout: dnn = DNN_Dropout(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) else: dnn = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) # initialize model parameters # if not resuming training, initialized from the specified pre-training file # if resuming training, initialized from the tmp model file if (ptr_layer_number > 0) and (resume_training is False): _file2nnet(dnn.layers, set_layer_num=ptr_layer_number, filename=ptr_file) if resume_training: _file2nnet(dnn.layers, filename=wdir + '/nnet.tmp') # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) log('> ... finetuning the model') while (cfg.lrate.get_rate() != 0): # one epoch of sgd training train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100 * numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100 * numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error=100 * numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file, input_factor=cfg.input_dropout_factor, factor=cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file)
c.append(pretraining_fns[i](index = batch_index, corruption = cfg.corruption_levels[i], lr = cfg.learning_rates[i], momentum = cfg.momentum)) cfg.train_sets.initialize_read() log('> layer %i, epoch %d, reconstruction cost %f' % (i, epoch, numpy.mean(c))) # output nnet parameters and training state, for training resume _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') save_two_integers((i, epoch+1), wdir + '/training_state.tmp') # output nnet parameters and training state, for training resume start_epoch_index = 0 save_two_integers((i+1, 0), wdir + '/training_state.tmp') # save the pretrained nnet to file # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(sda.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file) # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.write_model_to_kaldi(cfg.kaldi_output_file) log('> ... the final Kaldi model is ' + cfg.kaldi_output_file) # finally remove the training-resuming files os.remove(wdir + '/nnet.tmp') os.remove(wdir + '/training_state.tmp')