def init_srbm(rbm_cfg, numpy_rng=None): """ Initialize a SRBM given a RBM config """ if numpy_rng is None: numpy_rng = numpy.random.RandomState() theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # Following pdnn, initialize a parallel DNN and use it to initialize SRBM dnn_cfg = NetworkConfig() dnn_cfg.n_ins = rbm_cfg.n_ins dnn_cfg.hidden_layers_sizes = rbm_cfg.hidden_layers_sizes dnn_cfg.n_outs = rbm_cfg.n_outs dnn = DNN(numpy_rng, theano_rng=theano_rng, cfg=dnn_cfg) return SRBM(numpy_rng, theano_rng=theano_rng, cfg=rbm_cfg, dnn=dnn)
def get_dnn_cfg(dnn_fname): """ Construct a minimum required NetworkConfig given a model file """ model_data = io.json_load(dnn_fname) cfg = NetworkConfig() cfg.hidden_layers_sizes = [] i = 0 while 'W{}'.format(i) in model_data: W_shape = string_2_array(model_data['W{}'.format(i)]).shape # Currently factored layer can only be the first hidden layer # TODO: change this! if i == 0: if 'side_W{}_0'.format(i) in model_data: factored_cfg = FactoredConfig() j = 0 while 'side_b{}_{}'.format(i, j) in model_data: assert 'side_W{}_{}'.format(i, j) in model_data side_W_shape = string_2_array(model_data['side_W{}_{}'.format(i, j)]).shape if j == 0: factored_cfg.n_in_main = W_shape[0] factored_cfg.n_in_side = side_W_shape[0] # NOTE: this assumes that main and secondary features # are disjoint, but this is not required by the model. # TODO: find a way to relax this assumption. cfg.n_ins = W_shape[0] + side_W_shape[0] factored_cfg.side_layers.append(side_W_shape[1]) j += 1 cfg.factored_cfg = factored_cfg else: cfg.n_ins = W_shape[0] if 'W{}'.format(i + 1) in model_data: cfg.hidden_layers_sizes.append(W_shape[1]) else: cfg.n_outs = W_shape[1] i += 1 return cfg
'train_data', 'valid_data', 'nnet_spec', 'conv_nnet_spec', 'wdir' ] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] conv_nnet_spec = arguments['conv_nnet_spec'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] # parse network configuration from arguments, and initialize data reading cfg = NetworkConfig() cfg.model_type = 'CNN_LACEA' cfg.parse_config_cnn(arguments, '10:' + nnet_spec, conv_nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp_CNN_LACEA') and os.path.exists( wdir + '/training_state.tmp_CNN_LACEA'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp_CNN_LACEA') log('> ... found nnet.tmp_CNN_LACEA and training_state.tmp_CNN_LACEA, now resume training from epoch ' + str(cfg.lrate.epoch)) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30))
dnn_array = [] # parse data specification train_data_spec_array = parse_data_spec_mtl(train_data_spec) valid_data_spec_array = parse_data_spec_mtl(valid_data_spec) if len(train_data_spec_array) != task_number or len(valid_data_spec_array) != task_number: print len(train_data_spec_array) print task_number print "Error: #datasets in data specification doesn't match #tasks"; exit(1) # split shared_spec ans indiv_spec into individual task's networks nnet_spec_array, shared_layers_num = parse_nnet_spec_mtl(shared_spec, indiv_spec) if len(nnet_spec_array) != task_number: print "Error: #networks specified by --indiv-spec doesn't match #tasks"; exit(1) # parse network configuration from arguments, and initialize data reading for n in xrange(task_number): network_config = NetworkConfig() network_config.parse_config_dnn(arguments, nnet_spec_array[n]) network_config.init_data_reading(train_data_spec_array[n], valid_data_spec_array[n]) config_array.append(network_config) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) resume_training = False; resume_tasks = [] # if we are resuming training, then MLT only operates on the terminated tasks for n in xrange(task_number): log('> ... building the model for task %d' % (n)) cfg = config_array[n] # set up the model dnn_shared = None; shared_layers = [] if n > 0: dnn_shared = dnn_array[0]; shared_layers = [m for m in xrange(shared_layers_num)] print shared_layers
wdir = arguments['wdir'] # numpy random generator numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30)) log('> ... initializing the model') # parse network configuration from arguments, and initialize data reading cfg = RBMConfig() cfg.parse_config_common(arguments) cfg.init_data_reading(train_data_spec) # we also need to set up a DNN model, whose parameters are shared with RBM, for 2 reasons: # first, we can use DNN's model reading and writing functions, instead of designing these functions for RBM specifically # second, DNN generates cfg_dnn = NetworkConfig() cfg_dnn.n_ins = cfg.n_ins cfg_dnn.hidden_layers_sizes = cfg.hidden_layers_sizes cfg_dnn.n_outs = cfg.n_outs dnn = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg_dnn) # now set up the RBM model with dnn as an argument srbm = SRBM(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg, dnn=dnn) # get the pre-training function log('> ... getting the pre-training functions') pretraining_fns = srbm.pretraining_functions(train_set_x=cfg.train_x, batch_size=cfg.batch_size, k=1, weight_cost=0.0002) start_layer_index = 0
arg_elements = [sys.argv[i] for i in range(1, len(sys.argv))] arguments = parse_arguments(arg_elements) required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'conv_nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg); exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] conv_nnet_spec = arguments['conv_nnet_spec'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] # parse network configuration from arguments, and initialize data reading cfg = NetworkConfig(); cfg.model_type = 'CNNV' cfg.parse_config_cnn(arguments, '10:' + nnet_spec, conv_nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0; ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp')
'train_data', 'valid_data', 'nnet_spec', 'lstm_nnet_spec', 'wdir' ] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] lstm_nnet_spec = arguments['lstm_nnet_spec'] wdir = arguments['wdir'] # parse network configuration from arguments, and initialize data reading cfg = NetworkConfig() cfg.model_type = 'ATTEND_LSTM' cfg.parse_config_attend(arguments, nnet_spec, lstm_nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) print 'Extra dim: ' + str(cfg.extra_dim) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30)) log('> ... building the model') # setup model dnn = PhaseATTEND_LSTM(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y),
] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] si_nnet_spec = arguments['si_nnet_spec'] adapt_nnet_spec = arguments['adapt_nnet_spec'] wdir = arguments['wdir'] init_model_file = arguments['init_model'] # parse network configuration from arguments, and initialize data reading cfg_si = NetworkConfig() cfg_si.parse_config_dnn(arguments, si_nnet_spec) cfg_si.init_data_reading(train_data_spec, valid_data_spec) # parse the structure of the i-vector network cfg_adapt = NetworkConfig() # net_split = adapt_nnet_spec.split(':') # adapt_nnet_spec = '' # for n in xrange(len(net_split) - 1): # adapt_nnet_spec += net_split[n] + ':' # cfg_adapt.parse_config_dnn(arguments, adapt_nnet_spec + '0') cfg_adapt.parse_config_dnn(arguments, adapt_nnet_spec + ':0') numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30)) log('> ... initializing the model') # setup up the model
arguments = parse_arguments(arg_elements) required_arguments = ["train_data", "valid_data", "nnet_spec", "conv_nnet_spec", "wdir"] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) # mandatory arguments train_data_spec = arguments["train_data"] valid_data_spec = arguments["valid_data"] conv_nnet_spec = arguments["conv_nnet_spec"] nnet_spec = arguments["nnet_spec"] wdir = arguments["wdir"] # parse network configuration from arguments, and initialize data reading cfg = NetworkConfig() cfg.model_type = "CNN" cfg.parse_config_cnn(arguments, "10:" + nnet_spec, conv_nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0 ptr_file = "" if arguments.has_key("ptr_file") and arguments.has_key("ptr_layer_number"): ptr_file = arguments["ptr_file"] ptr_layer_number = int(arguments["ptr_layer_number"]) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + "/nnet.tmp") and os.path.exists(wdir + "/training_state.tmp"):
for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] si_nnet_spec = arguments['si_nnet_spec'] si_conv_nnet_spec = arguments['si_conv_nnet_spec'] adapt_nnet_spec = arguments['adapt_nnet_spec'] wdir = arguments['wdir'] init_model_file = arguments['init_model'] # parse network configuration from arguments, and initialize data reading cfg_si = NetworkConfig() cfg_si.model_type = 'CNN' cfg_si.parse_config_cnn(arguments, '10:' + si_nnet_spec, si_conv_nnet_spec) cfg_si.init_data_reading(train_data_spec, valid_data_spec) # parse the structure of the i-vector network cfg_adapt = NetworkConfig() net_split = adapt_nnet_spec.split(':') adapt_nnet_spec = '' for n in xrange(len(net_split) - 1): adapt_nnet_spec += net_split[n] + ':' cfg_adapt.parse_config_dnn(arguments, adapt_nnet_spec + '0') numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30)) log('> ... initializing the model')
arg_elements = [sys.argv[i] for i in range(1, len(sys.argv))] arguments = parse_arguments(arg_elements) required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'lstm_nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg); exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] lstm_nnet_spec = arguments['lstm_nnet_spec'] wdir = arguments['wdir'] # parse network configuration from arguments, and initialize data reading cfg = NetworkConfig();cfg.model_type = 'ATTEND_LSTM' cfg.parse_config_attend(arguments, nnet_spec, lstm_nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... building the model') # setup model dnn = ATTEND_LSTM(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size)
arguments = parse_arguments(arg_elements) required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'nnet_spec_tower1', 'nnet_spec_tower2', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg); exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] nnet_spec_tower1 = arguments['nnet_spec_tower1'] nnet_spec_tower2 = arguments['nnet_spec_tower2'] wdir = arguments['wdir'] # parse network configuration from arguments, and initialize data reading cfg_tower1 = NetworkConfig(); cfg_tower1.parse_config_dnn(arguments, nnet_spec_tower1 + ":0") cfg_tower2 = NetworkConfig(); cfg_tower2.parse_config_dnn(arguments, nnet_spec_tower2 + ":0") cfg = NetworkConfig(); cfg.parse_config_dnn(arguments, str(cfg_tower1.hidden_layers_sizes[-1] + cfg_tower2.hidden_layers_sizes[-1]) + ":" + nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0; ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True
arg_elements = [sys.argv[i] for i in range(1, len(sys.argv))] arguments = parse_arguments(arg_elements) required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'lstm_nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg); exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] lstm_nnet_spec = arguments['lstm_nnet_spec'] wdir = arguments['wdir'] # parse network configuration from arguments, and initialize data reading cfg = NetworkConfig();cfg.model_type = 'LSTMV' cfg.parse_config_ldnn(arguments, nnet_spec, lstm_nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... building the model') # setup model dnn = LSTMV(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size)
arg_elements = [sys.argv[i] for i in range(1, len(sys.argv))] arguments = parse_arguments(arg_elements) required_arguments = ['train_data', 'valid_data', 'si_nnet_spec', 'wdir', 'adapt_nnet_spec', 'init_model'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg); exit(1) # mandatory arguments train_data_spec = arguments['train_data']; valid_data_spec = arguments['valid_data'] si_nnet_spec = arguments['si_nnet_spec'] adapt_nnet_spec = arguments['adapt_nnet_spec']; wdir = arguments['wdir'] init_model_file = arguments['init_model'] # parse network configuration from arguments, and initialize data reading cfg_si = NetworkConfig() cfg_si.parse_config_dnn(arguments, si_nnet_spec) cfg_si.init_data_reading(train_data_spec, valid_data_spec) # parse the structure of the i-vector network cfg_adapt = NetworkConfig() # net_split = adapt_nnet_spec.split(':') # adapt_nnet_spec = '' # for n in xrange(len(net_split) - 1): # adapt_nnet_spec += net_split[n] + ':' # cfg_adapt.parse_config_dnn(arguments, adapt_nnet_spec + '0') cfg_adapt.parse_config_dnn(arguments, adapt_nnet_spec + ':0') numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... initializing the model') # setup up the model
for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) # mandatory arguments train_data_spec = arguments["train_data"] valid_data_spec = arguments["valid_data"] si_nnet_spec = arguments["si_nnet_spec"] si_conv_nnet_spec = arguments["si_conv_nnet_spec"] adapt_nnet_spec = arguments["adapt_nnet_spec"] wdir = arguments["wdir"] init_model_file = arguments["init_model"] # parse network configuration from arguments, and initialize data reading cfg_si = NetworkConfig() cfg_si.model_type = "CNN" cfg_si.parse_config_cnn(arguments, "10:" + si_nnet_spec, si_conv_nnet_spec) cfg_si.init_data_reading(train_data_spec, valid_data_spec) # parse the structure of the i-vector network cfg_adapt = NetworkConfig() net_split = adapt_nnet_spec.split(":") adapt_nnet_spec = "" for n in xrange(len(net_split) - 1): adapt_nnet_spec += net_split[n] + ":" cfg_adapt.parse_config_dnn(arguments, adapt_nnet_spec + "0") numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log("> ... initializing the model")
# check the arguments arg_elements = [sys.argv[i] for i in range(1, len(sys.argv))] arguments = parse_arguments(arg_elements) required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg); exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] # parse network configuration from arguments, and initialize data reading cfg = NetworkConfig();cfg.model_type = 'DNNV' cfg.parse_config_dnn(arguments, nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0; ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp')
] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] conv_nnet_spec = arguments['conv_nnet_spec'] lstm_nnet_spec = arguments['lstm_nnet_spec'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] # parse network configuration from arguments, and initialize data reading cfg = NetworkConfig() cfg.model_type = 'CLDNNV' cfg.parse_config_cldnn(arguments, nnet_spec, conv_nnet_spec, lstm_nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30)) log('> ... building the model') # setup model dnn = CLDNNV(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y),
arguments = parse_arguments(arg_elements) required_arguments = ['train_data', 'valid_data', 'si_nnet_spec', 'si_conv_nnet_spec', 'wdir', 'adapt_nnet_spec', 'init_model'] for arg in required_arguments: if (arg in arguments) == False: print("Error: the argument %s has to be specified" % (arg)); exit(1) # mandatory arguments train_data_spec = arguments['train_data']; valid_data_spec = arguments['valid_data'] si_nnet_spec = arguments['si_nnet_spec'] si_conv_nnet_spec = arguments['si_conv_nnet_spec'] adapt_nnet_spec = arguments['adapt_nnet_spec']; wdir = arguments['wdir'] init_model_file = arguments['init_model'] # parse network configuration from arguments, and initialize data reading cfg_si = NetworkConfig(); cfg_si.model_type = 'CNN' cfg_si.parse_config_cnn(arguments, '10:' + si_nnet_spec, si_conv_nnet_spec) cfg_si.init_data_reading(train_data_spec, valid_data_spec) # parse the structure of the i-vector network cfg_adapt = NetworkConfig() net_split = adapt_nnet_spec.split(':') adapt_nnet_spec = '' for n in range(len(net_split) - 1): adapt_nnet_spec += net_split[n] + ':' cfg_adapt.parse_config_dnn(arguments, adapt_nnet_spec + '0') numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... initializing the model') # setup up the model
# check the arguments arg_elements = [sys.argv[i] for i in range(1, len(sys.argv))] arguments = parse_arguments(arg_elements) required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg); exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] # parse network configuration from arguments, and initialize data reading cfg = NetworkConfig() cfg.parse_config_dnn(arguments, nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0; ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp')
def dnn_run(arguments): required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] cfg = NetworkConfig() cfg.parse_config_dnn(arguments, nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0 ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp') log('> ... found nnet.tmp and training_state.tmp, now resume training from epoch ' + str(cfg.lrate.epoch)) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30)) log('> ... building the model') # setup model if cfg.do_dropout: dnn = DNN_Dropout(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) else: dnn = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) # initialize model parameters # if not resuming training, initialized from the specified pre-training file # if resuming training, initialized from the tmp model file if (ptr_layer_number > 0) and (resume_training is False): _file2nnet(dnn.layers, set_layer_num=ptr_layer_number, filename=ptr_file) if resume_training: _file2nnet(dnn.layers, filename=wdir + '/nnet.tmp') # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions( (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) log('> ... finetuning the model') while (cfg.lrate.get_rate() != 0): # one epoch of sgd training train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100 * numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100 * numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error=100 * numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file, input_factor=cfg.input_dropout_factor, factor=cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file)
required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg); exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] multi_label = arguments['multi_label'] if multi_label=="true": multi_label = True else: multi_label = False # parse network configuration from arguments, and initialize data reading cfg = NetworkConfig(multi_label) cfg.parse_config_dnn(arguments, nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0; ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp')
def dnn_run(arguments): required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg); exit(1) train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] cfg = NetworkConfig() cfg.parse_config_dnn(arguments, nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0; ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp') log('> ... found nnet.tmp and training_state.tmp, now resume training from epoch ' + str(cfg.lrate.epoch)) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... building the model') # setup model if cfg.do_dropout: dnn = DNN_Dropout(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) else: dnn = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) # initialize model parameters # if not resuming training, initialized from the specified pre-training file # if resuming training, initialized from the tmp model file if (ptr_layer_number > 0) and (resume_training is False): _file2nnet(dnn.layers, set_layer_num = ptr_layer_number, filename = ptr_file) if resume_training: _file2nnet(dnn.layers, filename = wdir + '/nnet.tmp') # get the training, validation and testing function for the model log('> ... getting the finetuning functions') train_fn, valid_fn = dnn.build_finetune_functions((cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) log('> ... finetuning the model') while (cfg.lrate.get_rate() != 0): # one epoch of sgd training train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100*numpy.mean(train_error)) + '(%)') # validation valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100*numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = 100*numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration if cfg.param_output_file != '': _nnet2file(dnn.layers, filename=cfg.param_output_file, input_factor = cfg.input_dropout_factor, factor = cfg.dropout_factor) log('> ... the final PDNN model parameter is ' + cfg.param_output_file) if cfg.cfg_output_file != '': _cfg2file(dnn.cfg, filename=cfg.cfg_output_file) log('> ... the final PDNN model config is ' + cfg.cfg_output_file)
wdir = arguments['wdir'] # numpy random generator numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... initializing the model') # parse network configuration from arguments, and initialize data reading cfg = RBMConfig() cfg.parse_config_common(arguments) cfg.init_data_reading(train_data_spec) # we also need to set up a DNN model, whose parameters are shared with RBM, for 2 reasons: # first, we can use DNN's model reading and writing functions, instead of designing these functions for RBM specifically # second, DNN generates cfg_dnn = NetworkConfig() cfg_dnn.n_ins = cfg.n_ins; cfg_dnn.hidden_layers_sizes = cfg.hidden_layers_sizes; cfg_dnn.n_outs = cfg.n_outs dnn = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg_dnn) # now set up the RBM model with dnn as an argument srbm = SRBM(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg, dnn = dnn) # get the pre-training function log('> ... getting the pre-training functions') pretraining_fns = srbm.pretraining_functions(train_set_x=cfg.train_x, batch_size=cfg.batch_size, k = 1, weight_cost = 0.0002) start_layer_index = 0 start_epoch_index = 0 if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): start_layer_index, start_epoch_index = read_two_integers(wdir + '/training_state.tmp') log('> ... found nnet.tmp and training_state.tmp, now resume training from layer #' + str(start_layer_index) + ' epoch #' + str(start_epoch_index))
arg_elements = [sys.argv[i] for i in range(1, len(sys.argv))] arguments = parse_arguments(arg_elements) required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'wdir'] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) # mandatory arguments train_data_spec = arguments['train_data'] valid_data_spec = arguments['valid_data'] nnet_spec = arguments['nnet_spec'] wdir = arguments['wdir'] # parse network configuration from arguments, and initialize data reading cfg = NetworkConfig() cfg.parse_config_dnn(arguments, nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) if arguments.has_key('replicate'): cfg.replicate = int(arguments['replicate']) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0 ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] temp = arguments['ptr_layer_number'].split(':') if len(temp) > 1 or len(temp[0].split(',')) > 1: ptr_layer_number = [map(int, i.split(',')) for i in temp]