Beispiel #1
0
 def featurize(self, audio_clip):
     """ For a given audio clip, calculate the corresponding feature
     Params:
         audio_clip (str): Path to the audio clip
     """
     if self.spectrogram:
         return spectrogram_from_file(
             audio_clip, step=self.step, window=self.window,
             max_freq=self.max_freq)
     else:
         (rate, sig) = wav.read(audio_clip)
         #if self.pncc:
             #print('-'*60)
             #print("pncc")
             #print('-'*60)
             #pncc_array = pncc(sig, sr=rate, n_pncc=self.pncc_dim)
             #print(pncc_array)
             #print(pncc_array.shape)
             #return pncc_array
         #else:
             #print('-'*60)
             #print("mfcc")
             #print('-'*60)
         pncc_array = pncc(sig)
         #print(mfcc_array)
         #print(mfcc_array.shape)
         return pncc_array
Beispiel #2
0
 def featurize(self, audio_clip):
     """ For a given audio clip, calculate the log of its Fourier Transform
     Params:
         audio_clip(str): Path to the audio clip
     """
     return spectrogram_from_file(
         audio_clip, step=self.step, window=self.window,
         max_freq=self.max_freq)
Beispiel #3
0
 def featurize(self, audio_clip):
     if self.spectrogram:
         return spectrogram_from_file(audio_clip,
                                      step=self.step,
                                      window=self.window,
                                      max_freq=self.max_freq)
     else:
         (rate, sig) = wav.read(audio_clip)
         return mfcc(sig, rate, numcep=self.mfcc_dim)
Beispiel #4
0
 def featurize(self, audio_clip):
     """ For a given audio clip, calculate the corresponding feature
     Params:
         audio_clip (str): Path to the audio clip
     """
     if self.spectrogram:
         return spectrogram_from_file(
             audio_clip, step=self.step, window=self.window,
             max_freq=self.max_freq)
     else:
         (rate, sig) = wav.read(audio_clip)
         return mfcc(sig, rate, numcep=self.mfcc_dim)
 def featurize(self, audio_clip):
     """ For a given audio clip, calculate the corresponding feature
     Params:
         audio_clip (str): Path to the audio clip
     """
     if self.spectrogram:
         return spectrogram_from_file(
             audio_clip, step=self.step, window=self.window,
             max_freq=self.max_freq)
     else:
         (rate, sig) = wav.read(audio_clip)
         return mfcc(sig, rate, numcep=self.mfcc_dim)
Beispiel #6
0
 def featurize(self, audio_clip):
     """ For a given audio clip, calculate the corresponding feature
     Params:
         audio_clip (str): Path to the audio clip
     """
     if self.spectrogram:
         return spectrogram_from_file(audio_clip,
                                      step=self.step,
                                      window=self.window,
                                      max_freq=self.max_freq)
     else:
         return mfcc_from_file(filename=audio_clip, mfcc_dim=self.mfcc_dim)
 def featurize(self, audio_clip, target=False):
     """ For a given audio clip, calculate the log of its Fourier Transform
     Params:
         audio_clip(str): Path to the audio clip
     """
     if target:
         pad = 0
     else:
         pad = self.pad
     return spectrogram_from_file(audio_clip,
                                  step=self.step,
                                  window=self.window,
                                  max_freq=self.max_freq,
                                  pad=pad,
                                  log=self.use_log)[0]
Beispiel #8
0
 def featurize(self, audio_clip):
     """ For a given audio clip, calculate the corresponding feature
     Params:
         audio_clip (str): Path to the audio clip
     """
     # print("featurize, self.raw = ", self.raw)
     if self.spectrogram and not self.raw:
         return spectrogram_from_file(audio_clip,
                                      step=self.step,
                                      window=self.window,
                                      max_freq=self.max_freq)
     else:
         (rate, sig) = wav.read(audio_clip)
         if self.raw:
             # print("featurize, sig.shape = ", sig.shape)
             return sig.reshape(-1, 1)
         return mfcc(sig, rate, numcep=self.mfcc_dim)
num_hidden = 50
num_layers = 1
batch_size = 1
initial_learning_rate = 1e-2
momentum = 0.9

num_examples = 1
num_batches_per_epoch = int(num_examples / batch_size)

# Loading the data
audio_filename = maybe_download('LDC93S1.wav', 93638)
target_filename = maybe_download('LDC93S1.txt', 62)

inputs = spectrogram_from_file(audio_filename,
                               step=10,
                               window=20,
                               max_freq=8000,
                               eps=1e-14)

# Tranform in 3D array
#print(len(inputs))
train_inputs = np.asarray(inputs[np.newaxis, :])
#print(len(train_inputs))
train_inputs = (train_inputs - np.mean(train_inputs)) / np.std(train_inputs)
train_seq_len = [train_inputs.shape[1]]
print(train_seq_len)

# Readings targets
with open(target_filename, 'r') as f:

    #Only the last line is necessary
Beispiel #10
0
def featurize(audio_clip, step=10, window=20, max_freq=22050, desc_file=None):
    return spectrogram_from_file(
        audio_clip, mode=ModelMode.TEST, step=step, window=window,
        max_freq=max_freq)
Beispiel #11
0
# from sample_models import own_model
# from train_utils import train_my_model
# from keras.backend.tensorflow_backend import set_session
# from utils import spectrogram_from_file, mfcc_from_file
# import tensorflow as tf
# allocate 50% of GPU memory (if you like, feel free to change this)
# config = tf.ConfigProto()
# config.gpu_options.per_process_gpu_memory_fraction = 0.5
# set_session(tf.Session(config=config))

# train
# model = own_model(input_dim=161, output_dim=29)
# train_my_model(model, pickle_path='own_model_loss.pickle', save_model_path='own_model.h5')
import numpy as np
from utils import spectrogram_from_file, mfcc_from_file

s = spectrogram_from_file('audio/example.wav', 10, 20, 8000)
print(s)
# print(np.isnan(s))
# print(s.shape)
# mfcc = mfcc_from_file('audio/example.wav', 13)
# print(mfcc.shape)
# print(mfcc[0:2,:])
    def load_metadata_from_desc_file(self, desc_file, partition='train',
                                     max_duration=10.0):
        """ Read metadata from the description file
            (possibly takes long, depending on the filesize)
        Params:
            desc_file (str):  Path to a JSON-line file that contains labels and
                paths to the audio files
            partition (str): One of 'train', 'validation' or 'test'
            max_duration (float): In seconds, the maximum duration of
                utterances to train or test on
        """
        logger.info('Reading description file: {} for partition: {}'
                    .format(desc_file, partition))
        audio_paths, durations, texts, arpabets = [], [], [], []
        with open(desc_file, encoding='utf-8') as json_line_file:
            for line_num, json_line in enumerate(json_line_file):
                try:
                    spec = json.loads(json_line)
                    if float(spec['duration']) > max_duration:
                        continue
                    textlen= len(text_to_int_sequence(text_normalize(spec['text'])))
                    speclen= len(spectrogram_from_file(spec['key']))
                    if textlen >  speclen :
                            print('label > feats ignore setence')
                            continue
                    if textlen < 2:
                        print('small label ignore setence')
                        continue
                    audio_paths.append(spec['key'])
                    durations.append(float(spec['duration']))
                    texts.append(spec['text'])
                    if self.use_arpabets:
                        arpabets.append(spec['arpabet'])
                except Exception as e:
                    # Change to (KeyError, ValueError) or
                    # (KeyError,json.decoder.JSONDecodeError), depending on
                    # json module version
                    logger.warn('Error reading line #{}: {}'
                                .format(line_num, json_line))
                    logger.warn(str(e))

        if not self.use_arpabets:
            arpabets = [''] * len(audio_paths)

        if partition == 'train':
            self.train_audio_paths = audio_paths
            self.train_durations = durations
            self.train_texts = texts
            self.train_arpabets = arpabets
        elif partition == 'validation':
            self.val_audio_paths = audio_paths
            self.val_durations = durations
            self.val_texts = texts
            self.val_arpabets = arpabets
        elif partition == 'test':
            self.test_audio_paths = audio_paths
            self.test_durations = durations
            self.test_texts = texts
            self.test_arpabets = arpabets
        else:
            raise Exception("Invalid partition to load metadata. "
                            "Must be train/validation/test")
savedir = os.path.join(os.path.dirname(model_path),
                       'samples_{}ep'.format(best_epoch))
if not os.path.isdir(savedir):
    os.mkdir(savedir)

model = train_loop_best.model.cpu()
if hasattr(model, 'blocks'):
    for block in model.blocks:
        block.rnn.flatten_parameters()

print('Generating samples...')
for k, batch in tqdm(enumerate(test_loader)):
    f = G_test.audio_paths[k]
    spec, phase = spectrogram_from_file(f,
                                        window=window_size,
                                        step=step_size,
                                        log=args.use_log)
    #ref, phase_ref = spectrogram_from_file(f, window=window, step=step)

    with torch.no_grad():
        Y_hat, test_loss, layer_outputs = test_fn_all_layers(
            model, train_loop_best.criterion, batch)

    # Visualize layer outputs depending on model
    if type(model) == HighwayModel:
        # need to plot both output and gate
        hiddens, masks = layer_outputs
        n_layers = len(hiddens)
        if n_layers > 1:
            fig, axes = plt.subplots(4, n_layers)
        else: