Beispiel #1
0
    def test_load_audio(self):
        indexes = np.arange(5)
        x_data_raw, y_data_raw, sr = load_audio(self.df,
                                                indexes_in_batch=indexes)

        self.assertEqual(len(x_data_raw), 5)
        self.assertEqual(len(y_data_raw), 5)
Beispiel #2
0
    def test_extract_features_and_pad(self):
        indexes = np.arange(5)
        x_data_raw, y_data_raw, sr = load_audio(self.df,
                                                indexes_in_batch=indexes)
        x_data, input_length = self.dg.extract_features_and_pad(x_data_raw, sr)

        self.assertEqual(x_data.shape, (5, 382, 26))
        self.assertEqual(len(input_length), 5)
        self.assertLessEqual(all(input_length), 382)
Beispiel #3
0
    def test_convert_transcripts(self):
        _, y_data_raw, sr = load_audio(self.df, indexes_in_batch=[0])
        transcript, y_length = convert_and_pad_transcripts(y_data_raw)
        exp = [
            23., 5., 18., 5., 0., 9., 0., 2., 21., 20., 0., 1., 12., 18., 5.,
            1., 4., 25., 0., 15., 14., 0., 20., 8., 5., 0., 3., 1., 18., 20.
        ]

        list = transcript[0].tolist()
        self.assertListEqual(list, exp)
        self.assertEqual(y_length, 30)
Beispiel #4
0
    def test_extract_mel_spec(self):
        x_data_raw, _, sr = load_audio(self.df, indexes_in_batch=[0])
        mel_spec, x_length = extract_mel_spectrogram_and_pad(
            x_data_raw[0],
            sr=sr,
            max_pad_length=500,
            frame_length=320,
            hop_length=160,
            n_mels=40)

        self.assertTupleEqual(mel_spec.shape, (500, 40))
        self.assertEqual(x_length, 256)
Beispiel #5
0
    def test_extract_mfcc(self):
        x_data_raw, _, sr = load_audio(self.df, indexes_in_batch=[0])

        mfcc_padded, x_length = extract_mfcc_and_pad(x_data_raw[0],
                                                     sr=sr,
                                                     max_pad_length=500,
                                                     frame_length=320,
                                                     hop_length=160,
                                                     mfcc_features=26,
                                                     n_mels=40)

        self.assertTupleEqual(mfcc_padded.shape, (500, 26))
        self.assertEqual(x_length, 256)
Beispiel #6
0
    def __getitem__(self, batch_index):
        """
        Generates a batch of correctly shaped X and Y data

        :param batch_index: index of the batch to generate
        :return: input dictionary containing:
                'the_input':     np.ndarray[shape=(batch_size, max_seq_length, mfcc_features)]: input audio data
                'the_labels':    np.ndarray[shape=(batch_size, max_transcript_length)]: transcription data
                'input_length':  np.ndarray[shape=(batch_size, 1)]: length of each sequence (numb of frames) in x_data
                'label_length':  np.ndarray[shape=(batch_size, 1)]: length of each sequence (numb of letters) in y_data
                 output dictionary containing:
                'ctc':           np.ndarray[shape=(batch_size, 1)]: dummy data for dummy loss function

        """

        # Generate indexes of current batch
        indexes_in_batch = self.indexes[batch_index *
                                        self.batch_size:(batch_index + 1) *
                                        self.batch_size]

        # Shuffle indexes within current batch if shuffle=true
        if self.shuffle:
            shuf(indexes_in_batch)

        # Load audio and transcripts
        x_data_raw, y_data_raw, sr = load_audio(self.df, indexes_in_batch)

        # Preprocess and pad data
        x_data, input_length = self.extract_features_and_pad(x_data_raw, sr)
        y_data, label_length = convert_and_pad_transcripts(y_data_raw)

        # print ("\nx_data shape: ", x_data.shape)
        # print ("y_data shape: ", y_data.shape)
        # print ("input_length shape: ", input_length.shape)
        # print ("label_length shape: ", label_length.shape)
        # print ("input length: ", input_length)
        # print ("label_length: ", label_length, "\n")

        inputs = {
            'the_input': x_data,
            'the_labels': y_data,
            'input_length': input_length,
            'label_length': label_length
        }

        outputs = {
            'ctc': np.zeros([self.batch_size])
        }  # dummy data for dummy loss function

        return inputs, outputs
Beispiel #7
0
    def test_get_seq_size(self):
        x_data_raw, _, sr = load_audio(self.df, indexes_in_batch=[0])

        size = self.dg.get_seq_size(x_data_raw[0], sr)

        self.assertEqual(size, 256)