Exemple #1
0
def train_model(model_class,input_dimensions, hidden_size, batch_size, truncated_len, num_epochs, model_name,
print_period=50, save_period=50, log_period=50, n_files_per_epoch=5, sparsify_epochs = [], sparsity_level=1, wav_fnames=None, align_fnames=None):
    if model_name not in os.listdir(DIRS['MODELS']):
        os.mkdir(DIRS['MODELS']+model_name)
    
    tf.reset_default_graph()
    model = model_class(input_dimensions, hidden_size)
    init_variables = tf.global_variables_initializer()
    saver = tf.train.Saver()
    if wav_fnames is None:
        wav_fnames = Path(DIRS['RAW_DATA']).rglob("*.wav")
    
    epochs_per_files_last = 0
    
    # Initialize the losses
    train_losses = []
    validation_losses = []

    with tf.Session() as sess:
        sess.run(init_variables)
        # Perform all the iterations
        for epoch in tqdm_notebook(range(num_epochs)):
            if epochs_per_files_last==0:
                X = sl.load_data(wav_fnames, n_files_per_epoch)
                if align_fnames is not None:
                    toh = sl.load_text_oh(align_fnames, n_files_per_epoch)
                total_series_length = int(X.shape[1])
                epochs_per_files_last = total_series_length//batch_size//truncated_len
            epochs_per_files_last-=1
            
            if epoch in sparsify_epochs:
                k = model.calc_sparsity_level(epoch, sparsify_epochs, sparsity_level)
                model.sparsify(k, sess)
            
            X_train, Y_train, X_test, Y_test = sl.get_train_test(X, batch_size, truncated_len, sess, text_oh=toh)
            train_loss, validation_loss = model.train(X_train, Y_train, X_test, Y_test, sess)

            # Log the losses
            train_losses.append(train_loss)
            validation_losses.append(validation_loss)
            
            msg = f'Iteration: {epoch}, train loss: {train_loss:.4f}, val loss: {validation_loss:.4f}'
            # Display an update every 50 iterations
            if epoch % print_period == 0 and epoch!=0:
                print(msg)
            if epoch % print_period == 0 and epoch!=0:
                sl.plot_losses(train_losses, validation_losses, title=msg)
                plt.show()
            if epoch % save_period == 0:
                saver.save(sess, DIRS['MODELS']+model_name+'/checkpoint',global_step=epoch,write_meta_graph=True)
        
        sl.plot_losses(train_losses, validation_losses, title=msg)
        plt.show()

        saver.save(sess, DIRS['MODELS']+model_name+'/final')
        
    return train_losses, validation_losses, model
def train_model(input_dimensions, hidden_size, batch_size, truncated_len, num_epochs, model_name,
print_period=50, save_period=50, log_period=50):
    if model_name not in os.listdir(DIRS['MODELS']):
        os.mkdir(DIRS['MODELS']+model_name)
    
    tf.reset_default_graph()
    model = WaveGRU(input_dimensions, hidden_size)
    init_variables = tf.global_variables_initializer()
    saver = tf.train.Saver()
    wav_fnames = Path(DIRS['RAW_DATA']).rglob("*.wav")
    
    epochs_per_files_last = 0
    
    
    # Initialize the losses
    train_losses = []
    validation_losses = []


    with tf.Session() as sess:
        sess.run(init_variables)
        
        # Perform all the iterations
        for epoch in tqdm_notebook(range(num_epochs)):
            if epochs_per_files_last==0:
                X = sl.load_data(wav_fnames, 5)
                total_series_length = int(X.shape[1])
                epochs_per_files_last = total_series_length//batch_size//truncated_len
            epochs_per_files_last-=1
            
            X_train, Y_train, X_test, Y_test = sl.get_train_test(X, batch_size, truncated_len, sess)
            train_loss, validation_loss = model.train(X_train, Y_train, X_test, Y_test, sess)

            # Log the losses
            train_losses.append(train_loss)
            validation_losses.append(validation_loss)

            # Display an update every 50 iterations
            if epoch % print_period == 0 and epoch!=0:
                print(f'Iteration: {epoch}, train loss: {train_loss:.4f}, val loss: {validation_loss:.4f}')
            if epoch % print_period == 0 and epoch!=0:
                sl.plot_losses(train_losses, validation_losses,
                            title=f'Iteration: {epoch}, train loss: {train_loss:.4f}, val loss: {validation_loss:.4f}')
                plt.show()
            if epoch % save_period == 0:
                saver.save(sess, DIRS['MODELS']+model_name+'/checkpoint',global_step=epoch,write_meta_graph=True)
        
        sl.plot_losses(train_losses, validation_losses,
                     title='Iteration: %d, train loss: %.4f, test loss: %.4f' % (epoch, train_loss, validation_loss))
        plt.show()

        saver.save(sess, DIRS['MODELS']+model_name+'/final')
        
    return train_losses, validation_losses, model
Exemple #3
0
plt.tight_layout()
plt.show()

for idx, m in enumerate([Ru, Rr, Re]):
    plt.subplot(1,3,idx+1)
    plt.imshow(m)
    plt.title(['Ru','Rr','Re'][idx])
plt.tight_layout()
plt.show()

for idx, m in enumerate([Iu, Ir, Ie]):
    plt.subplot(1,3,idx+1)
    sns.heatmap(m)
    plt.title(['Iu','Ir','Ie'][idx])
plt.tight_layout()
plt.show()audio = sl.load_audio_not_one_hot(DIRS['RAW_DATA']+'cv_corpus_v1/cv-other-train/sample-052026.wav')X = sl.load_data([DIRS['RAW_DATA']+'cv_corpus_v1/cv-other-train/sample-052026.wav'])with tf.Session() as sess:
    init_variables = tf.global_variables_initializer()
    sess.run(init_variables)
    O1, O2, O3, O4 = sess.run([gru.O1, gru.O2, gru.O3, gru.O4])
    Iu, Ir, Ie = sess.run([gru.Iu, gru.Ir, gru.Ie])
    Ru, Rr, Re = sess.run([gru.Ru, gru.Rr, gru.Re])
    audio_eval = sess.run(audio)
    X_eval = sess.run(X)
    X_train, Y_train, X_test, Y_test = sl.get_train_test(X, 10, 5000)
    Y_train_audio = ((Y_train*128+128)[:,:,0])*256+(Y_train*128+128)[:,:,1]
    Y_train_audio_eval = sess.run(Y_train_audio)
    os.listdir(DIRS['RAW_DATA']+'cv_corpus_v1/cv-other-train')plt.plot(audio_eval, color='blue', label='audio')
for i in Y_train_audio_eval[:1]:
    plt.plot(i)
plt.legend()
plt.show()with tf.Session() as sess:
Exemple #4
0
plt.plot(train_losses, label='Train')
plt.plot(validation_losses, label='Validation')
plt.title(model_name)
plt.show()


# # Restoring model
model_name = f'Sparse_Develop'sorted(os.listdir(DIRS['MODELS']+model_name))tf.reset_default_graph()
#saver = tf.train.import_meta_graph(DIRS['MODELS']+model_name+'/final.meta')
saver = tf.train.import_meta_graph(DIRS['MODELS']+model_name+'/checkpoint-900.meta')
with tf.Session() as sess:
    saver.restore(sess,tf.train.latest_checkpoint(DIRS['MODELS']+model_name))
    restored_variables = {x.name:x.eval(session=sess) for x in tf.global_variables()[:13]}

tf.reset_default_graph()
gru = WaveGRU(input_dimensions, hidden_size, variables_values_dict=restored_variables)X = sl.load_data(wav_fnames, 3)
# # Sound generation
with tf.Session() as sess:
    init_variables = tf.global_variables_initializer()
    sess.run(init_variables)
    gen_to_wav = gru.generate_sound(num_pieces=1, n_seconds=2, session=sess, sample_rate=M_PARAMS['SAMPLE_RATE'])with tf.Session() as sess:
    sess.run(init_variables)
    #plt.plot(audio.eval(session=sess), label='real')
    plt.plot(gen_to_wav[0].eval(session=sess), label='generated')
plt.plot(np.int32([np.sin(x/1000)*16000+32256 for x in range(gen_to_wav.shape[1])]))with tf.Session() as sess:
    init_variables = tf.global_variables_initializer()
    sess.run(init_variables)
    O1, O2, O3, O4 = sess.run([gru.O1, gru.O2, gru.O3, gru.O4])
    Iu, Ir, Ie = sess.run([gru.Iu, gru.Ir, gru.Ie])
    Ru, Rr, Re = sess.run([gru.Ru, gru.Rr, gru.Re])for idx, m in enumerate([O1, O2, O3, O4]):
    plt.subplot(1,4,idx+1)
# In[13]:


fname = DIRS['RAW_DATA']+'/rus/voxforge_ru/0/00/78d77cdb75be'


# In[14]:


os.path.isfile(fname+'.wav')


# In[120]:


audio_data = sl.load_data([fname+'.wav']).eval(session=sess)
audio_data


# In[127]:


audio_data.shape

audio_data = np.concatenate([audio_data,np.ones((1,30-1-int(audio_data.shape[1]%30)+30,2))],1)
audio_data = np.concatenate([-np.ones((1,30*2,2)),audio_data],1)n_batches=30for i in range(n_batches-1):
    audio_data = np.concatenate([audio_data[:,1:,:],audio_data[:,:-1,-2:]],2)audio_data.shapeaudio_data_resh = audio_data.reshape((1,30,-1,2))audio_data_resh.shape
# # New model with new sparse

# In[163]:
# In[8]:

tf.reset_default_graph()
gru = WaveGRU(input_dimensions, hidden_size)

# In[9]:

init_variables = tf.global_variables_initializer()

# In[10]:

saver = tf.train.Saver()

# In[11]:

X = sl.load_data(wav_fnames, 3)

# In[12]:

batch_size = 10
truncated_len = M_PARAMS['SAMPLE_RATE'] // 128
total_series_length = int(X.shape[1])
num_epochs = 50  #total_series_length//batch_size//truncated_len
print(batch_size, truncated_len, total_series_length, num_epochs)

# In[13]:

n_early_stopping = 50

# In[13]:
Exemple #7
0

char_to_int = {k: idx for idx,k in enumerate('\0 абвгдежзийклмнопрстуфхцчшщъыьэюяё')}


# In[70]:


fname_wav, fname_txt, fname_align = text_ds.loc[0,['wav','txt','align']]
fname_wav


# In[71]:


data = sl.load_data([fname_wav])


# In[72]:


oh = sl.load_text_oh([fname_align])


# In[73]:


sl.get_train_test(data, batch_size=10, truncated_len=100, text_oh=oh)


# In[65]: