def split_data(io_train_list, io_test_list, train_speaker_ID, test_speaker_ID, n_ins, n_outs, buffer_size, plot=False): (i_train_list, o_train_list) = io_train_list (i_test_list, o_test_list) = io_test_list n_speakers = len(test_speaker_ID) #Split speakers data if n_speakers == 1: lwr_train = 0 upr_train = train_speaker_ID[0] lwr_test = 0 upr_test = test_speaker_ID[0] print "Train Data Speaker 1" train_data_reader_1 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) print "Test Data Speaker 1" test_data_reader_1 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) speakers_data = [(train_data_reader_1, test_data_reader_1)] elif n_speakers == 2: lwr_train = 0 upr_train = train_speaker_ID[0] lwr_test = 0 upr_test = test_speaker_ID[0] print "Train Data Speaker 1" print "Test Data Speaker 1" train_data_reader_1 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_1 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) lwr_train = upr_train upr_train = lwr_train + train_speaker_ID[1] lwr_test = upr_test upr_test = lwr_test + test_speaker_ID[1] print "Train Data Speaker 2" print "Test Data Speaker 2" train_data_reader_2 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_2 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) speakers_data = [(train_data_reader_1, test_data_reader_1), (train_data_reader_2, test_data_reader_2)] elif n_speakers == 3: lwr_train = 0 upr_train = train_speaker_ID[0] lwr_test = 0 upr_test = test_speaker_ID[0] print "Train Data Speaker 1" print "Test Data Speaker 1" train_data_reader_1 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_1 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) lwr_train = upr_train upr_train = lwr_train + train_speaker_ID[1] lwr_test = upr_test upr_test = lwr_test + test_speaker_ID[1] print "Train Data Speaker 2" print "Test Data Speaker 2" train_data_reader_2 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_2 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) lwr_train = upr_train upr_train = lwr_train + train_speaker_ID[2] lwr_test = upr_test upr_test = lwr_test + test_speaker_ID[2] print "Train Data Speaker 3" print "Test Data Speaker 3" train_data_reader_3 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_3 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) speakers_data = [(train_data_reader_1, test_data_reader_1), (train_data_reader_2, test_data_reader_2), (train_data_reader_3, test_data_reader_3)] elif n_speakers == 4: lwr_train = 0 upr_train = train_speaker_ID[0] lwr_test = 0 upr_test = test_speaker_ID[0] print "Train Data Speaker 1" print "Test Data Speaker 1" train_data_reader_1 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_1 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) lwr_train = upr_train upr_train = lwr_train + train_speaker_ID[1] lwr_test = upr_test upr_test = lwr_test + test_speaker_ID[1] print "Train Data Speaker 2" print "Test Data Speaker 2" train_data_reader_2 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_2 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) lwr_train = upr_train upr_train = lwr_train + train_speaker_ID[2] lwr_test = upr_test upr_test = lwr_test + test_speaker_ID[2] print "Train Data Speaker 3" print "Test Data Speaker 3" train_data_reader_3 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_3 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) lwr_train = upr_train upr_train = lwr_train + train_speaker_ID[3] lwr_test = upr_test upr_test = lwr_test + test_speaker_ID[3] print "Train Data Speaker 4" print "Test Data Speaker 4" train_data_reader_4 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_4 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) speakers_data = [(train_data_reader_1, test_data_reader_1), (train_data_reader_2, test_data_reader_2), (train_data_reader_3, test_data_reader_3), (train_data_reader_4, test_data_reader_4)] elif n_speakers == 5: lwr_train = 0 upr_train = train_speaker_ID[0] lwr_test = 0 upr_test = test_speaker_ID[0] print "Train Data Speaker 1" print "Test Data Speaker 1" train_data_reader_1 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_1 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) lwr_train = upr_train upr_train = lwr_train + train_speaker_ID[1] lwr_test = upr_test upr_test = lwr_test + test_speaker_ID[1] print "Train Data Speaker 2" print "Test Data Speaker 2" train_data_reader_2 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_2 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) lwr_train = upr_train upr_train = lwr_train + train_speaker_ID[2] lwr_test = upr_test upr_test = lwr_test + test_speaker_ID[2] print "Train Data Speaker 3" print "Test Data Speaker 3" train_data_reader_3 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_3 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) lwr_train = upr_train upr_train = lwr_train + train_speaker_ID[3] lwr_test = upr_test upr_test = lwr_test + test_speaker_ID[3] print "Train Data Speaker 4" print "Test Data Speaker 4" train_data_reader_4 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_4 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) lwr_train = upr_train upr_train = lwr_train + train_speaker_ID[4] lwr_test = upr_test upr_test = lwr_test + test_speaker_ID[4] print "Train Data Speaker 5" print "Test Data Speaker 5" train_data_reader_5 = VCTK_feat_collection( i_list=i_train_list[lwr_train:upr_train], o_list=o_train_list[lwr_train:upr_train], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) test_data_reader_5 = VCTK_feat_collection( i_list=i_test_list[lwr_test:upr_test], o_list=o_test_list[lwr_test:upr_test], n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) speakers_data = [(train_data_reader_1, test_data_reader_1), (train_data_reader_2, test_data_reader_2), (train_data_reader_3, test_data_reader_3), (train_data_reader_4, test_data_reader_4), (train_data_reader_5, test_data_reader_5)] return speakers_data
def train_DNN( io_train_list, io_test_list, n_ins, n_outs, n_speakers, buffer_size, plot=False): #################consider what is nnest_file_name about print "Starting train_DNN" ##################Setting parameters#### #n_ins = 601 #n_outs = 259 n_speakers = n_speakers #buffer_size = 200000 buffer_size = buffer_size #learning_rate = 0.012 learning_rate = 0.0002 finetune_lr = learning_rate training_epochs=100 batch_size = 256 l1_reg = 0.0 #L1_regularization l2_reg = 0.00001 #L2_regularization private_l2_reg = 0.00001 warmup_epoch = 5 #warmup_epoch = 10 momentum = 0.9 warmup_momentum = 0.3 hidden_layers_sizes = [512,512,512,512,512,512] stream_weights = [1.0] private_hidden_sizes = [1024] buffer_utt_size = 400 early_stop_epoch = 5 hidden_activation = 'tanh' output_activation = 'linear' #stream_lr_weights #use_private_hidden model_type = 'DNN' #self.speaker_ID= speaker_ID #print "Speaker ID :", self.speaker_ID ## use a switch to turn on pretraining ## pretraining may not help too much, if this case, we turn it off to save time do_pretraining = False pretraining_epochs = 10 pretraining_lr = 0.0001 buffer_size = int(buffer_size / batch_size) * batch_size ################### (i_train_list, o_train_list) = io_train_list (i_test_list, o_test_list) = io_test_list print "Building training data provider" train_data_reader = VCTK_feat_collection(i_list=i_train_list, o_list= o_train_list, n_ins= n_ins, n_outs = n_outs, n_speakers= n_speakers, buffer_size = buffer_size, shuffle = True) print "Building testing data provider" test_data_reader = VCTK_feat_collection(i_list=i_test_list, o_list= o_test_list, n_ins= n_ins, n_outs = n_outs, n_speakers= n_speakers, buffer_size = buffer_size, shuffle = False) io_shared_train_set, i_temp_train_set, o_temp_train_set = train_data_reader.load_next_partition() i_train_set, o_train_set = io_shared_train_set io_shared_test_set, i_temp_test_set, o_temp_test_set = test_data_reader.load_next_partition() i_test_set, o_test_set = io_shared_test_set train_data_reader.reset() test_data_reader.reset() ##temporally we use the training set as pretrain_set_x. ##we need to support any data for pretraining i_pretrain_set = i_train_set # numpy random generator numpy_rng = numpy.random.RandomState(123) print "Buiding the model" dnn_model = None pretrain_fn = None ## not all the model support pretraining right now train_fn = None valid_fn = None valid_model = None ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion if model_type == 'DNN': dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs = n_outs, l1_reg = l1_reg, l2_reg = l2_reg, hidden_layers_sizes = hidden_layers_sizes, hidden_activation = hidden_activation, output_activation = output_activation) train_fn, valid_fn = dnn_model.build_finetune_functions( (i_train_set, o_train_set), (i_test_set, o_test_set), batch_size=batch_size) print "Fine-tuning the ", model_type, "model" start_time = time.clock() best_dnn_model = dnn_model best_validation_loss = sys.float_info.max previous_loss = sys.float_info.max early_stop = 0 epoch = 0 previous_finetune_lr = finetune_lr while (epoch < training_epochs): epoch = epoch + 1 current_momentum = momentum current_finetune_lr = finetune_lr if epoch <= warmup_epoch: current_finetune_lr = finetune_lr current_momentum = warmup_momentum else: current_finetune_lr = previous_finetune_lr * 0.5 previous_finetune_lr = current_finetune_lr train_error = [] sub_start_time = time.clock() while (not train_data_reader.is_finish()): io_shared_train_set, i_temp_train_set, o_temp_train_set = train_data_reader.load_next_partition() i_train_set.set_value(numpy.asarray(i_temp_train_set, dtype=theano.config.floatX), borrow=True) o_train_set.set_value(numpy.asarray(o_temp_train_set, dtype=theano.config.floatX), borrow=True) n_train_batches = i_train_set.get_value().shape[0] / batch_size print " This partition :",i_train_set.get_value(borrow=True).shape[0], "frames (divided into ", n_train_batches, "of size", batch_size for minibatch_index in xrange(n_train_batches): this_train_error = train_fn(minibatch_index, current_finetune_lr, current_momentum) train_error.append(this_train_error) if numpy.isnan(this_train_error): print "Training error over minibatch ", minibatch_index+1, " of ", n_train_batches, " was ", this_train_error train_data_reader.reset() print 'Calculating validation loss' validation_losses = valid_fn() this_validation_loss = numpy.mean(validation_losses) # this has a possible bias if the minibatches were not all of identical size # but it should not be siginficant if minibatches are small this_train_valid_loss = numpy.mean(train_error) sub_end_time = time.clock() loss_difference = this_validation_loss - previous_loss print "Epoch :", epoch, " Validation Error :", this_validation_loss, " Train Error :", this_train_valid_loss, " Time Spent : ",(sub_end_time - sub_start_time) if this_validation_loss < best_validation_loss: best_dnn_model = dnn_model best_validation_loss = this_validation_loss print "Validation loss decreased, so saving model" if this_validation_loss >= previous_loss: print "Validation loss increased" dbn = best_dnn_model early_stop += 1 if early_stop > early_stop_epoch: print "Stopping early" break if math.isnan(this_validation_loss): break previous_loss = this_validation_loss end_time = time.clock() #####OJO CON ESTA LINEA##################################### cPickle.dump(best_dnn_model, open("output.data", 'wb')) print "Overall training time : ", ((end_time - start_time) / 60.), " Validation Error :", best_validation_loss print "Architecture Training :",hidden_layers_sizes print "Number of speakers :", n_speakers, return best_validation_loss
def train_DNN(io_train_list, io_test_list, n_ins, n_outs, n_speakers, buffer_size, plot=False): #################consider what is nnest_file_name about print "Starting train_DNN" ##################Setting parameters#### #n_ins = 601 #n_outs = 259 n_speakers = n_speakers #buffer_size = 200000 buffer_size = buffer_size #learning_rate = 0.012 learning_rate = 0.0002 finetune_lr = learning_rate training_epochs = 100 batch_size = 256 l1_reg = 0.0 #L1_regularization l2_reg = 0.00001 #L2_regularization private_l2_reg = 0.00001 warmup_epoch = 5 #warmup_epoch = 10 momentum = 0.9 warmup_momentum = 0.3 hidden_layers_sizes = [512, 512, 512, 512, 512, 512] stream_weights = [1.0] private_hidden_sizes = [1024] buffer_utt_size = 400 early_stop_epoch = 5 hidden_activation = 'tanh' output_activation = 'linear' #stream_lr_weights #use_private_hidden model_type = 'DNN' #self.speaker_ID= speaker_ID #print "Speaker ID :", self.speaker_ID ## use a switch to turn on pretraining ## pretraining may not help too much, if this case, we turn it off to save time do_pretraining = False pretraining_epochs = 10 pretraining_lr = 0.0001 buffer_size = int(buffer_size / batch_size) * batch_size ################### (i_train_list, o_train_list) = io_train_list (i_test_list, o_test_list) = io_test_list print "Building training data provider" train_data_reader = VCTK_feat_collection(i_list=i_train_list, o_list=o_train_list, n_ins=n_ins, n_outs=n_outs, n_speakers=n_speakers, buffer_size=buffer_size, shuffle=True) print "Building testing data provider" test_data_reader = VCTK_feat_collection(i_list=i_test_list, o_list=o_test_list, n_ins=n_ins, n_outs=n_outs, n_speakers=n_speakers, buffer_size=buffer_size, shuffle=False) io_shared_train_set, i_temp_train_set, o_temp_train_set = train_data_reader.load_next_partition( ) i_train_set, o_train_set = io_shared_train_set io_shared_test_set, i_temp_test_set, o_temp_test_set = test_data_reader.load_next_partition( ) i_test_set, o_test_set = io_shared_test_set train_data_reader.reset() test_data_reader.reset() ##temporally we use the training set as pretrain_set_x. ##we need to support any data for pretraining i_pretrain_set = i_train_set # numpy random generator numpy_rng = numpy.random.RandomState(123) print "Buiding the model" dnn_model = None pretrain_fn = None ## not all the model support pretraining right now train_fn = None valid_fn = None valid_model = None ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion if model_type == 'DNN': dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, l1_reg=l1_reg, l2_reg=l2_reg, hidden_layers_sizes=hidden_layers_sizes, hidden_activation=hidden_activation, output_activation=output_activation) train_fn, valid_fn = dnn_model.build_finetune_functions( (i_train_set, o_train_set), (i_test_set, o_test_set), batch_size=batch_size) print "Fine-tuning the ", model_type, "model" start_time = time.clock() best_dnn_model = dnn_model best_validation_loss = sys.float_info.max previous_loss = sys.float_info.max early_stop = 0 epoch = 0 previous_finetune_lr = finetune_lr while (epoch < training_epochs): epoch = epoch + 1 current_momentum = momentum current_finetune_lr = finetune_lr if epoch <= warmup_epoch: current_finetune_lr = finetune_lr current_momentum = warmup_momentum else: current_finetune_lr = previous_finetune_lr * 0.5 previous_finetune_lr = current_finetune_lr train_error = [] sub_start_time = time.clock() while (not train_data_reader.is_finish()): io_shared_train_set, i_temp_train_set, o_temp_train_set = train_data_reader.load_next_partition( ) i_train_set.set_value(numpy.asarray(i_temp_train_set, dtype=theano.config.floatX), borrow=True) o_train_set.set_value(numpy.asarray(o_temp_train_set, dtype=theano.config.floatX), borrow=True) n_train_batches = i_train_set.get_value().shape[0] / batch_size print " This partition :", i_train_set.get_value( borrow=True ).shape[ 0], "frames (divided into ", n_train_batches, "of size", batch_size for minibatch_index in xrange(n_train_batches): this_train_error = train_fn(minibatch_index, current_finetune_lr, current_momentum) train_error.append(this_train_error) if numpy.isnan(this_train_error): print "Training error over minibatch ", minibatch_index + 1, " of ", n_train_batches, " was ", this_train_error train_data_reader.reset() print 'Calculating validation loss' validation_losses = valid_fn() this_validation_loss = numpy.mean(validation_losses) # this has a possible bias if the minibatches were not all of identical size # but it should not be siginficant if minibatches are small this_train_valid_loss = numpy.mean(train_error) sub_end_time = time.clock() loss_difference = this_validation_loss - previous_loss print "Epoch :", epoch, " Validation Error :", this_validation_loss, " Train Error :", this_train_valid_loss, " Time Spent : ", ( sub_end_time - sub_start_time) if this_validation_loss < best_validation_loss: best_dnn_model = dnn_model best_validation_loss = this_validation_loss print "Validation loss decreased, so saving model" if this_validation_loss >= previous_loss: print "Validation loss increased" dbn = best_dnn_model early_stop += 1 if early_stop > early_stop_epoch: print "Stopping early" break if math.isnan(this_validation_loss): break previous_loss = this_validation_loss end_time = time.clock() #####OJO CON ESTA LINEA##################################### cPickle.dump(best_dnn_model, open("output.data", 'wb')) print "Overall training time : ", ( (end_time - start_time) / 60.), " Validation Error :", best_validation_loss print "Architecture Training :", hidden_layers_sizes print "Number of speakers :", n_speakers, return best_validation_loss