def train_model(model_class,input_dimensions, hidden_size, batch_size, truncated_len, num_epochs, model_name, print_period=50, save_period=50, log_period=50, n_files_per_epoch=5, sparsify_epochs = [], sparsity_level=1, wav_fnames=None, align_fnames=None): if model_name not in os.listdir(DIRS['MODELS']): os.mkdir(DIRS['MODELS']+model_name) tf.reset_default_graph() model = model_class(input_dimensions, hidden_size) init_variables = tf.global_variables_initializer() saver = tf.train.Saver() if wav_fnames is None: wav_fnames = Path(DIRS['RAW_DATA']).rglob("*.wav") epochs_per_files_last = 0 # Initialize the losses train_losses = [] validation_losses = [] with tf.Session() as sess: sess.run(init_variables) # Perform all the iterations for epoch in tqdm_notebook(range(num_epochs)): if epochs_per_files_last==0: X = sl.load_data(wav_fnames, n_files_per_epoch) if align_fnames is not None: toh = sl.load_text_oh(align_fnames, n_files_per_epoch) total_series_length = int(X.shape[1]) epochs_per_files_last = total_series_length//batch_size//truncated_len epochs_per_files_last-=1 if epoch in sparsify_epochs: k = model.calc_sparsity_level(epoch, sparsify_epochs, sparsity_level) model.sparsify(k, sess) X_train, Y_train, X_test, Y_test = sl.get_train_test(X, batch_size, truncated_len, sess, text_oh=toh) train_loss, validation_loss = model.train(X_train, Y_train, X_test, Y_test, sess) # Log the losses train_losses.append(train_loss) validation_losses.append(validation_loss) msg = f'Iteration: {epoch}, train loss: {train_loss:.4f}, val loss: {validation_loss:.4f}' # Display an update every 50 iterations if epoch % print_period == 0 and epoch!=0: print(msg) if epoch % print_period == 0 and epoch!=0: sl.plot_losses(train_losses, validation_losses, title=msg) plt.show() if epoch % save_period == 0: saver.save(sess, DIRS['MODELS']+model_name+'/checkpoint',global_step=epoch,write_meta_graph=True) sl.plot_losses(train_losses, validation_losses, title=msg) plt.show() saver.save(sess, DIRS['MODELS']+model_name+'/final') return train_losses, validation_losses, model
def train_model(input_dimensions, hidden_size, batch_size, truncated_len, num_epochs, model_name, print_period=50, save_period=50, log_period=50): if model_name not in os.listdir(DIRS['MODELS']): os.mkdir(DIRS['MODELS']+model_name) tf.reset_default_graph() model = WaveGRU(input_dimensions, hidden_size) init_variables = tf.global_variables_initializer() saver = tf.train.Saver() wav_fnames = Path(DIRS['RAW_DATA']).rglob("*.wav") epochs_per_files_last = 0 # Initialize the losses train_losses = [] validation_losses = [] with tf.Session() as sess: sess.run(init_variables) # Perform all the iterations for epoch in tqdm_notebook(range(num_epochs)): if epochs_per_files_last==0: X = sl.load_data(wav_fnames, 5) total_series_length = int(X.shape[1]) epochs_per_files_last = total_series_length//batch_size//truncated_len epochs_per_files_last-=1 X_train, Y_train, X_test, Y_test = sl.get_train_test(X, batch_size, truncated_len, sess) train_loss, validation_loss = model.train(X_train, Y_train, X_test, Y_test, sess) # Log the losses train_losses.append(train_loss) validation_losses.append(validation_loss) # Display an update every 50 iterations if epoch % print_period == 0 and epoch!=0: print(f'Iteration: {epoch}, train loss: {train_loss:.4f}, val loss: {validation_loss:.4f}') if epoch % print_period == 0 and epoch!=0: sl.plot_losses(train_losses, validation_losses, title=f'Iteration: {epoch}, train loss: {train_loss:.4f}, val loss: {validation_loss:.4f}') plt.show() if epoch % save_period == 0: saver.save(sess, DIRS['MODELS']+model_name+'/checkpoint',global_step=epoch,write_meta_graph=True) sl.plot_losses(train_losses, validation_losses, title='Iteration: %d, train loss: %.4f, test loss: %.4f' % (epoch, train_loss, validation_loss)) plt.show() saver.save(sess, DIRS['MODELS']+model_name+'/final') return train_losses, validation_losses, model
plt.tight_layout() plt.show() for idx, m in enumerate([Ru, Rr, Re]): plt.subplot(1,3,idx+1) plt.imshow(m) plt.title(['Ru','Rr','Re'][idx]) plt.tight_layout() plt.show() for idx, m in enumerate([Iu, Ir, Ie]): plt.subplot(1,3,idx+1) sns.heatmap(m) plt.title(['Iu','Ir','Ie'][idx]) plt.tight_layout() plt.show()audio = sl.load_audio_not_one_hot(DIRS['RAW_DATA']+'cv_corpus_v1/cv-other-train/sample-052026.wav')X = sl.load_data([DIRS['RAW_DATA']+'cv_corpus_v1/cv-other-train/sample-052026.wav'])with tf.Session() as sess: init_variables = tf.global_variables_initializer() sess.run(init_variables) O1, O2, O3, O4 = sess.run([gru.O1, gru.O2, gru.O3, gru.O4]) Iu, Ir, Ie = sess.run([gru.Iu, gru.Ir, gru.Ie]) Ru, Rr, Re = sess.run([gru.Ru, gru.Rr, gru.Re]) audio_eval = sess.run(audio) X_eval = sess.run(X) X_train, Y_train, X_test, Y_test = sl.get_train_test(X, 10, 5000) Y_train_audio = ((Y_train*128+128)[:,:,0])*256+(Y_train*128+128)[:,:,1] Y_train_audio_eval = sess.run(Y_train_audio) os.listdir(DIRS['RAW_DATA']+'cv_corpus_v1/cv-other-train')plt.plot(audio_eval, color='blue', label='audio') for i in Y_train_audio_eval[:1]: plt.plot(i) plt.legend() plt.show()with tf.Session() as sess:
plt.plot(train_losses, label='Train') plt.plot(validation_losses, label='Validation') plt.title(model_name) plt.show() # # Restoring model model_name = f'Sparse_Develop'sorted(os.listdir(DIRS['MODELS']+model_name))tf.reset_default_graph() #saver = tf.train.import_meta_graph(DIRS['MODELS']+model_name+'/final.meta') saver = tf.train.import_meta_graph(DIRS['MODELS']+model_name+'/checkpoint-900.meta') with tf.Session() as sess: saver.restore(sess,tf.train.latest_checkpoint(DIRS['MODELS']+model_name)) restored_variables = {x.name:x.eval(session=sess) for x in tf.global_variables()[:13]} tf.reset_default_graph() gru = WaveGRU(input_dimensions, hidden_size, variables_values_dict=restored_variables)X = sl.load_data(wav_fnames, 3) # # Sound generation with tf.Session() as sess: init_variables = tf.global_variables_initializer() sess.run(init_variables) gen_to_wav = gru.generate_sound(num_pieces=1, n_seconds=2, session=sess, sample_rate=M_PARAMS['SAMPLE_RATE'])with tf.Session() as sess: sess.run(init_variables) #plt.plot(audio.eval(session=sess), label='real') plt.plot(gen_to_wav[0].eval(session=sess), label='generated') plt.plot(np.int32([np.sin(x/1000)*16000+32256 for x in range(gen_to_wav.shape[1])]))with tf.Session() as sess: init_variables = tf.global_variables_initializer() sess.run(init_variables) O1, O2, O3, O4 = sess.run([gru.O1, gru.O2, gru.O3, gru.O4]) Iu, Ir, Ie = sess.run([gru.Iu, gru.Ir, gru.Ie]) Ru, Rr, Re = sess.run([gru.Ru, gru.Rr, gru.Re])for idx, m in enumerate([O1, O2, O3, O4]): plt.subplot(1,4,idx+1)
# In[13]: fname = DIRS['RAW_DATA']+'/rus/voxforge_ru/0/00/78d77cdb75be' # In[14]: os.path.isfile(fname+'.wav') # In[120]: audio_data = sl.load_data([fname+'.wav']).eval(session=sess) audio_data # In[127]: audio_data.shape audio_data = np.concatenate([audio_data,np.ones((1,30-1-int(audio_data.shape[1]%30)+30,2))],1) audio_data = np.concatenate([-np.ones((1,30*2,2)),audio_data],1)n_batches=30for i in range(n_batches-1): audio_data = np.concatenate([audio_data[:,1:,:],audio_data[:,:-1,-2:]],2)audio_data.shapeaudio_data_resh = audio_data.reshape((1,30,-1,2))audio_data_resh.shape # # New model with new sparse # In[163]:
# In[8]: tf.reset_default_graph() gru = WaveGRU(input_dimensions, hidden_size) # In[9]: init_variables = tf.global_variables_initializer() # In[10]: saver = tf.train.Saver() # In[11]: X = sl.load_data(wav_fnames, 3) # In[12]: batch_size = 10 truncated_len = M_PARAMS['SAMPLE_RATE'] // 128 total_series_length = int(X.shape[1]) num_epochs = 50 #total_series_length//batch_size//truncated_len print(batch_size, truncated_len, total_series_length, num_epochs) # In[13]: n_early_stopping = 50 # In[13]:
char_to_int = {k: idx for idx,k in enumerate('\0 абвгдежзийклмнопрстуфхцчшщъыьэюяё')} # In[70]: fname_wav, fname_txt, fname_align = text_ds.loc[0,['wav','txt','align']] fname_wav # In[71]: data = sl.load_data([fname_wav]) # In[72]: oh = sl.load_text_oh([fname_align]) # In[73]: sl.get_train_test(data, batch_size=10, truncated_len=100, text_oh=oh) # In[65]: